├── .gitignore
├── Lib
    └── compositor
    │   ├── __init__.py
    │   ├── caseConversionMaps.py
    │   ├── classDefinitionTables.py
    │   ├── cmap.py
    │   ├── error.py
    │   ├── featureList.py
    │   ├── font.py
    │   ├── glyphRecord.py
    │   ├── layoutEngine.py
    │   ├── logger.py
    │   ├── lookupList.py
    │   ├── scriptList.py
    │   ├── subTablesBase.py
    │   ├── subTablesGPOS.py
    │   ├── subTablesGSUB.py
    │   ├── tables.py
    │   ├── textUtilities.py
    │   └── wordBreakProperties.py
├── License.txt
├── MANIFEST.in
├── README.md
├── demo.py
├── pyproject.toml
├── setup.py
├── todo.txt
└── tools
    ├── PropList.txt
    ├── SpecialCasing.txt
    ├── UnicodeData.txt
    ├── UnicodeReferenceGenerator.py
    └── WordBreakProperty.txt


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | *.egg-info/
3 | __pycache__/
4 | *.pyc


--------------------------------------------------------------------------------
/Lib/compositor/__init__.py:
--------------------------------------------------------------------------------
1 | from compositor.error import CompositorError
2 | from compositor.layoutEngine import LayoutEngine
3 | from compositor.font import Font, Info, Glyph
4 | 
5 | version = "0.3b"
6 | 


--------------------------------------------------------------------------------
/Lib/compositor/classDefinitionTables.py:
--------------------------------------------------------------------------------
 1 | class ClassDef(object):
 2 | 
 3 |     """
 4 |     Deviation from spec:
 5 |     - StartGlyph attribute is not implemented.
 6 |     - GlyphCount attribute is not implemented.
 7 |     - ClassValueArray attribute is not implemented.
 8 | 
 9 |     The structure of this object does not closely
10 |     follow the specification. Instead, the basic
11 |     functionality is implemented through standard
12 |     dict methods.
13 | 
14 |     To determine if a glyph is in the class:
15 |         >>> "x" in aClass
16 |         True
17 | 
18 |     To get the class value of a particular glyph:
19 |         >>> aClass["x"]
20 |         330
21 |     """
22 | 
23 |     __slots__ = ["_map"]
24 | 
25 |     def __init__(self):
26 |         self._map = None
27 | 
28 |     def loadFromFontTools(self, classDef):
29 |         self._map = dict(classDef.classDefs)
30 |         return self
31 | 
32 |     def __getitem__(self, glyphName):
33 |         return self._map.get(glyphName, 0)
34 | 
35 |     def _get_Glyphs(self):
36 |         return self._map
37 | 
38 |     Glyphs = property(_get_Glyphs, doc="This is for reference only. Not for use in processing.")
39 | 
40 | 
41 | class GlyphClassDef(ClassDef):
42 | 
43 |     """
44 |     This is a subclass of ClassDefFormat1.
45 | 
46 |     Retrieving the class for a glyph from this
47 |     object will always return a value. If the
48 |     glyph is not in the class definitions,
49 |     zero will be returned.
50 |     """
51 | 
52 | 
53 | class MarkAttachClassDef(ClassDef):
54 | 
55 |     """
56 |     This is a subclass of ClassDefFormat1.
57 | 
58 |     Retrieving the class for a glyph from this
59 |     object will always return a value. If the
60 |     glyph is not in the class definitions,
61 |     zero will be returned.
62 |     """
63 | 


--------------------------------------------------------------------------------
/Lib/compositor/cmap.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utilities for handling the cmap table
 3 | and character mapping in general.
 4 | """
 5 | 
 6 | def extractCMAP(ttFont):
 7 |     for platformID, encodingID in [(3, 10), (0, 3), (3, 1)]:
 8 |         cmapSubtable = ttFont["cmap"].getcmap(platformID, encodingID)
 9 |         if cmapSubtable is not None:
10 |             return cmapSubtable.cmap
11 |     from compositor.error import CompositorError
12 |     raise CompositorError("Found neither CMAP (3, 10), (0, 3), nor (3, 1) in font.")
13 | 
14 | def reverseCMAP(cmap):
15 |     reversed = {}
16 |     for value, name in cmap.items():
17 |         if name not in reversed:
18 |             reversed[name] = []
19 |         reversed[name].append(value)
20 |     return reversed
21 | 


--------------------------------------------------------------------------------
/Lib/compositor/error.py:
--------------------------------------------------------------------------------
1 | class CompositorError(Exception): pass


--------------------------------------------------------------------------------
/Lib/compositor/featureList.py:
--------------------------------------------------------------------------------
 1 | """
 2 | FeatureList object (and friends).
 3 | """
 4 | 
 5 | 
 6 | __all__ = ["FeatureList", "FeatureRecord"]
 7 | 
 8 | 
 9 | class FeatureList(object):
10 | 
11 |     __slots__ = ["FeatureCount", "FeatureRecord"]
12 | 
13 |     def __init__(self):
14 |         self.FeatureCount = 0
15 |         self.FeatureRecord = []
16 | 
17 |     def loadFromFontTools(self, featureList):
18 |         self.FeatureCount = featureList.FeatureCount
19 |         self.FeatureRecord = []
20 |         self.FeatureRecord = [FeatureRecord().loadFromFontTools(record) for record in featureList.FeatureRecord]
21 |         return self
22 | 
23 | 
24 | class FeatureRecord(object):
25 | 
26 |     __slots__ = ["FeatureTag", "Feature"]
27 | 
28 |     def __init__(self):
29 |         self.FeatureTag = None
30 |         self.Feature = None
31 | 
32 |     def loadFromFontTools(self, featureRecord):
33 |         self.FeatureTag = featureRecord.FeatureTag
34 |         self.Feature = Feature().loadFromFontTools(featureRecord.Feature)
35 |         return self
36 | 
37 | 
38 | class Feature(object):
39 | 
40 |     __slots__ = ["FeatureParams", "LookupCount", "LookupListIndex"]
41 | 
42 |     def __init__(self):
43 |         self.FeatureParams = None
44 |         self.LookupCount = 0
45 |         self.LookupListIndex = []
46 | 
47 |     def loadFromFontTools(self, feature):
48 |         self.FeatureParams = feature.FeatureParams # XXX?
49 |         self.LookupCount = feature.LookupCount
50 |         self.LookupListIndex = list(feature.LookupListIndex)
51 |         return self
52 | 
53 | 


--------------------------------------------------------------------------------
/Lib/compositor/font.py:
--------------------------------------------------------------------------------
  1 | from __future__ import unicode_literals
  2 | import weakref
  3 | from fontTools.ttLib import TTFont
  4 | from fontTools.pens.basePen import AbstractPen
  5 | from fontTools.misc.textTools import tostr
  6 | from compositor.layoutEngine import LayoutEngine
  7 | from compositor.glyphRecord import GlyphRecord
  8 | from compositor.cmap import extractCMAP
  9 | from compositor.error import CompositorError
 10 | 
 11 | 
 12 | class Font(LayoutEngine):
 13 | 
 14 |     def __init__(self, path, glyphClass=None):
 15 |         super(Font, self).__init__()
 16 |         self.path = path
 17 |         self._glyphs = {}
 18 |         if isinstance(path, TTFont):
 19 |             self.source = path
 20 |         else:
 21 |             self.source = TTFont(path)
 22 |         self.loadGlyphSet()
 23 |         self.loadCMAP()
 24 |         self.loadFeatures()
 25 |         self.loadInfo()
 26 |         if glyphClass is None:
 27 |             glyphClass = Glyph
 28 |         self.glyphClass = glyphClass
 29 | 
 30 |     def __del__(self):
 31 |         del self._glyphs
 32 |         self.source.close()
 33 |         del self.source
 34 | 
 35 |     # --------------
 36 |     # initialization
 37 |     # --------------
 38 | 
 39 |     def loadCMAP(self):
 40 |         cmap = extractCMAP(self.source)
 41 |         self.setCMAP(cmap)
 42 | 
 43 |     def loadGlyphSet(self):
 44 |         self.glyphSet = self.source.getGlyphSet()
 45 |         # the glyph order will be needed later
 46 |         # to assign the proper glyph index to
 47 |         # glyph objects.
 48 |         order = self.source.getGlyphOrder()
 49 |         self._glyphOrder = {}
 50 |         for index, glyphName in enumerate(order):
 51 |             self._glyphOrder[glyphName] = index
 52 | 
 53 |     def loadInfo(self):
 54 |         self.info = info = Info()
 55 |         head = self.source["head"]
 56 |         hhea = self.source["hhea"]
 57 |         os2 = self.source["OS/2"]
 58 |         info.unitsPerEm = head.unitsPerEm
 59 |         info.ascender = hhea.ascent
 60 |         info.descender = hhea.descent
 61 |         info.xHeight = os2.sxHeight
 62 |         info.capHeight = os2.sCapHeight
 63 |         # names
 64 |         nameIDs = {}
 65 |         for nameRecord in self.source["name"].names:
 66 |             nameID = nameRecord.nameID
 67 |             platformID = nameRecord.platformID
 68 |             platEncID = nameRecord.platEncID
 69 |             langID = nameRecord.langID
 70 |             nameIDs[nameID, platformID, platEncID, langID] = nameRecord.toUnicode()
 71 |         # to retrieve the family and style names, first start
 72 |         # with the preferred name entries and progress to less
 73 |         # specific entries until something is found.
 74 |         familyPriority = [(16, 1, 0, 0), (16, 1, None, None), (16, None, None, None),
 75 |                         (1, 1, 0, 0), (1, 1, None, None), (1, None, None, None)]
 76 |         familyName = self._skimNameIDs(nameIDs, familyPriority)
 77 |         stylePriority = [(17, 1, 0, 0), (17, 1, None, None), (17, None, None, None),
 78 |                         (2, 1, 0, 0), (2, 1, None, None), (2, None, None, None)]
 79 |         styleName = self._skimNameIDs(nameIDs, stylePriority)
 80 |         if familyName is None or styleName is None:
 81 |             raise CompositorError("Could not extract name data from name table.")
 82 |         self.info.familyName = familyName
 83 |         self.info.styleName = styleName
 84 |         # stylistic set names
 85 |         self.stylisticSetNames = {}
 86 |         if self.gsub:
 87 |             for featureRecord in self.gsub.FeatureList.FeatureRecord:
 88 |                 params = featureRecord.Feature.FeatureParams
 89 |                 if hasattr(params, "UINameID"):
 90 |                     ssNameID = params.UINameID
 91 |                     namePriority = [(ssNameID, 1, 0, 0), (ssNameID, 1, None, None), (ssNameID, 3, 1, 1033), (ssNameID, 3, None, None)]
 92 |                     ssName = self._skimNameIDs(nameIDs, namePriority)
 93 |                     if ssName:
 94 |                         self.stylisticSetNames[featureRecord.FeatureTag] = ssName
 95 | 
 96 |     def _skimNameIDs(self, nameIDs, priority):
 97 |         for (nameID, platformID, platEncID, langID) in priority:
 98 |             for (nID, pID, pEID, lID), text in nameIDs.items():
 99 |                 if nID != nameID:
100 |                     continue
101 |                 if pID != platformID and platformID is not None:
102 |                     continue
103 |                 if pEID != platEncID and platEncID is not None:
104 |                     continue
105 |                 if lID != langID and langID is not None:
106 |                     continue
107 |                 return text
108 | 
109 |     def loadFeatures(self):
110 |         gdef = None
111 |         if "GDEF" in self.source:
112 |             gdef = self.source["GDEF"]
113 |         gsub = None
114 |         if "GSUB" in self.source:
115 |             gsub = self.source["GSUB"]
116 |         gpos = None
117 |         if "GPOS" in self.source:
118 |             gpos = self.source["GPOS"]
119 |         self.setFeatureTables(gdef, gsub, gpos)
120 | 
121 |     # -------------
122 |     # dict behavior
123 |     # -------------
124 | 
125 |     def keys(self):
126 |         return self.glyphSet.keys()
127 | 
128 |     def __contains__(self, name):
129 |         return name in self.glyphSet
130 | 
131 |     def __getitem__(self, name):
132 |         if name not in self._glyphs:
133 |             if name not in self.glyphSet:
134 |                 name = self.fallbackGlyph
135 |             glyph = self.glyphSet[name]
136 |             index = self._glyphOrder[name]
137 |             glyph = self.glyphClass(name, index, glyph, self)
138 |             self._glyphs[name] = glyph
139 |         return self._glyphs[name]
140 | 
141 |     # -----------------
142 |     # string processing
143 |     # -----------------
144 | 
145 |     def stringToGlyphNames(self, string):
146 |         glyphNames = []
147 |         for c in string:
148 |             c = tostr(c)
149 |             v = ord(c)
150 |             if v in self.cmap:
151 |                 glyphNames.append(self.cmap[v])
152 |             elif self.fallbackGlyph is not None:
153 |                 glyphNames.append(self.fallbackGlyph)
154 |         return glyphNames
155 | 
156 |     def stringToGlyphRecords(self, string):
157 |         return [GlyphRecord(glyphName) for glyphName in self.stringToGlyphNames(string)]
158 | 
159 |     def didProcessingGSUB(self, glyphRecords):
160 |         for glyphRecord in glyphRecords:
161 |             glyphRecord.advanceWidth += self[glyphRecord.glyphName].width
162 | 
163 |     # -------------
164 |     # Miscellaneous
165 |     # -------------
166 | 
167 |     def getGlyphOrder(self):
168 |         return self.source.getGlyphOrder()
169 | 
170 | 
171 | class Info(object): pass
172 | 
173 | 
174 | class Glyph(object):
175 | 
176 |     def __init__(self, name, index, source, font):
177 |         # the char string must be loaded by drawing it
178 |         if not hasattr(source, "width"):
179 |             source.draw(_GlyphLoadPen())
180 |         self.name = name
181 |         self.source = source
182 |         self.width = source.width
183 |         self.font = weakref.ref(font)
184 |         self.index = index
185 | 
186 |     def draw(self, pen):
187 |         self.source.draw(pen)
188 | 
189 |     def _get_bounds(self):
190 |         from fontTools.pens.boundsPen import BoundsPen
191 |         pen = BoundsPen(self.font())
192 |         self.draw(pen)
193 |         return pen.bounds
194 | 
195 |     bounds = property(_get_bounds)
196 | 
197 | 
198 | class _GlyphLoadPen(AbstractPen):
199 | 
200 |     def __init__(self):
201 |         pass
202 | 
203 |     def moveTo(self, pt):
204 |         pass
205 | 
206 |     def lineTo(self, pt):
207 |         pass
208 | 
209 |     def curveTo(self, *points):
210 |         pass
211 | 
212 |     def qCurveTo(self, *points):
213 |         pass
214 | 
215 |     def addComponent(self, glyphName, transformation):
216 |         pass
217 | 
218 |     def closePath(self):
219 |         pass
220 | 
221 |     def endPath(self):
222 |         pass
223 | 


--------------------------------------------------------------------------------
/Lib/compositor/glyphRecord.py:
--------------------------------------------------------------------------------
  1 | class GlyphRecord(object):
  2 | 
  3 |     """
  4 |     GlyphRecord object.
  5 | 
  6 |     This is the object type which will be contained in the list
  7 |     returned by font.process("A String").
  8 | 
  9 |     This object should NOT be constructed outside of a
 10 |     Compositor context.
 11 | 
 12 |     This object contains the following attributes:
 13 |     - glyphName
 14 |       The glyph name.
 15 |     - xPlacement
 16 |     - yPlacement
 17 |     - xAdvance
 18 |     - yAdvance
 19 |       The numerical values that control the placement
 20 |       and advance of the glyph. For more information
 21 |       on these, check the ValueRecord specification
 22 |       here (scroll way down the page):
 23 |       http://www.microsoft.com/typography/otspec/gpos.htm
 24 |     - alternates
 25 |       This is a list containing alternates for the glyph
 26 |       referenced by this glyph record. During processing
 27 |       by the tables in the engine, this list of will be
 28 |       mutated and obliterated n number of times based on
 29 |       the features and lookups being processed. There is no
 30 |       guarantee that the alternates listed here will
 31 |       reference the final glyph contained in the record.
 32 |       Therefore, this validation is up to the caller.
 33 |       Also, the internal processing will populate this
 34 |       list with glyph names.
 35 |       Note: You do not need to worry about any of the
 36 |       validation or population issues discussed here
 37 |       if you are using the Font object. That
 38 |       object handles all of the necessary cleanup in
 39 |       the process method.
 40 |     - ligatureComponents
 41 |       This is a list of glyph names that are the
 42 |       components of a ligature.
 43 | 
 44 |     This object contains three methods for making educated
 45 |     guesses about Unicode values. This is necessary when
 46 |     word breaks are determined.
 47 |     - saveState
 48 |       This method saves the glyph name provided, which
 49 |       can either be a glyph name or a list of glyph names
 50 |       in the case of lgatures. This will add the glyph name
 51 |       to the record's substitution history. This should be
 52 |       done before a substitution is made.
 53 |     - getSide1GlyphNameWithUnicodeValue
 54 |     - getSide2GlyphNameWithUnicodeValue
 55 |       These two methods find the most recent glyph name
 56 |       for each side that has a Unicode value. When called,
 57 |       they work backwards through the glyph names saved with
 58 |       the saveState method until a glyph name with a Unicode
 59 |       value is found.
 60 |     """
 61 | 
 62 |     __slots__ = ["glyph", "glyphName", "xPlacement", "yPlacement",
 63 |                 "xAdvance", "yAdvance", "advanceWidth", "advanceHeight",
 64 |                 "alternates", "_alternatesReference",
 65 |                 "_ligatureComponents", "_ligatureComponentsReference",
 66 |                 "_substitutionHistory"]
 67 | 
 68 |     def __init__(self, glyphName):
 69 |         self.glyph = None
 70 |         self.glyphName = glyphName
 71 |         self.xPlacement = 0
 72 |         self.yPlacement = 0
 73 |         self.xAdvance = 0
 74 |         self.yAdvance = 0
 75 |         self.advanceWidth = 0
 76 |         self.advanceHeight = 0
 77 |         self.alternates = []
 78 |         self._alternatesReference = None
 79 |         self._ligatureComponents = []
 80 |         self._substitutionHistory = []
 81 | 
 82 |     def __repr__(self):
 83 |         name = str(self.glyphName)
 84 |         xP = str(self.xPlacement)
 85 |         yP = str(self.yPlacement)
 86 |         xA = str(self.xAdvance)
 87 |         yA = str(self.yAdvance)
 88 |         s = "<GlyphRecord: Name: %s XPlacement: %s YPlacement: %s XAdvance: %s YAdvance: %s>" % (name, xP, yP, xA, yA)
 89 |         return s
 90 | 
 91 |     def __add__(self, valueRecord):
 92 |         self.xPlacement += valueRecord.XPlacement
 93 |         self.yPlacement += valueRecord.YPlacement
 94 |         self.xAdvance += valueRecord.XAdvance
 95 |         self.yAdvance += valueRecord.YAdvance
 96 |         return self
 97 | 
 98 |     def _get_ligatureComponents(self):
 99 |         return list(self._ligatureComponents)
100 | 
101 |     def _set_ligatureComponents(self, components):
102 |         self._ligatureComponents = list(components)
103 | 
104 |     ligatureComponents = property(_get_ligatureComponents, _set_ligatureComponents)
105 | 
106 |     def saveState(self, glyphName):
107 |         if isinstance(glyphName, list):
108 |             glyphName = list(glyphName)
109 |         self._substitutionHistory.append(glyphName)
110 | 
111 |     def getSide1GlyphNameWithUnicodeValue(self, reversedCMAP):
112 |         if self.glyphName in reversedCMAP:
113 |             return self.glyphName
114 |         for glyphName in reversed(self._substitutionHistory):
115 |             if isinstance(glyphName, list):
116 |                 glyphName = glyphName[0]
117 |             if glyphName in reversedCMAP:
118 |                 return glyphName
119 |         return None
120 | 
121 |     def getSide2GlyphNameWithUnicodeValue(self, reversedCMAP):
122 |         if self.glyphName in reversedCMAP:
123 |             return self.glyphName
124 |         for glyphName in reversed(self._substitutionHistory):
125 |             if isinstance(glyphName, list):
126 |                 glyphName = glyphName[-1]
127 |             if glyphName in reversedCMAP:
128 |                 return glyphName
129 |         return None
130 | 
131 | 
132 | def glyphNamesToGlyphRecords(glyphList):
133 |     """
134 |     >>> glyphList = ["a", "b"]
135 |     >>> glyphNamesToGlyphRecords(glyphList)
136 |     [<GlyphRecord: Name: a XPlacement: 0 YPlacement: 0 XAdvance: 0 YAdvance: 0>, <GlyphRecord: Name: b XPlacement: 0 YPlacement: 0 XAdvance: 0 YAdvance: 0>]
137 |     """
138 |     return [GlyphRecord(glyphName) for glyphName in glyphList]
139 | 
140 | def glyphRecordsToTuples(glyphRecords):
141 |     """
142 |     >>> vr = GlyphRecord("foo")
143 |     >>> vr.xPlacement = 1
144 |     >>> vr.yPlacement = 2
145 |     >>> vr.xAdvance = 3
146 |     >>> vr.yAdvance = 4
147 |     >>> glyphRecordsToTuples([vr])
148 |     [('foo', 1, 2, 3, 4)]
149 |     """
150 |     tuples = []
151 |     for record in glyphRecords:
152 |         xP = record.xPlacement
153 |         yP = record.yPlacement
154 |         xA = record.xAdvance
155 |         yA = record.yAdvance
156 |         gN = record.glyphName
157 |         tuples.append((gN, xP, yP, xA, yA))
158 |     return tuples
159 | 
160 | def glyphRecordsToGlyphNames(glyphRecords):
161 |     """
162 |     >>> glyphList = ["a", "b"]
163 |     >>> glyphRecords = glyphNamesToGlyphRecords(glyphList)
164 |     >>> glyphRecordsToGlyphNames(glyphRecords)
165 |     ['a', 'b']
166 |     """
167 |     return [record.glyphName for record in glyphRecords]
168 | 
169 | def _testMath():
170 |     """
171 |     >>> from subTablesGPOS import ValueRecord
172 |     >>> vr = ValueRecord()
173 |     >>> vr.XPlacement = 1
174 |     >>> vr.YPlacement = 2
175 |     >>> vr.XAdvance = 3
176 |     >>> vr.YAdvance = 4
177 |     >>> gr = GlyphRecord("foo")
178 |     >>> gr.xPlacement = 1
179 |     >>> gr.yPlacement = 2
180 |     >>> gr.xAdvance = 3
181 |     >>> gr.yAdvance = 4
182 |     >>> gr + vr
183 |     <GlyphRecord: Name: foo XPlacement: 2 YPlacement: 4 XAdvance: 6 YAdvance: 8>
184 |     """
185 | 
186 | def _testUnicodeGuessing():
187 |     """
188 |     >>> cmap = {
189 |     ... "a" : 97,
190 |     ... "b" : 98,
191 |     ... }
192 |     >>> r = GlyphRecord("a")
193 |     >>> r.saveState("a")
194 |     >>> r.glyphName = "a.alt1"
195 |     >>> r.saveState("a.alt1")
196 |     >>> r.glyphName = "a.alt2"
197 |     >>> r.getSide1GlyphNameWithUnicodeValue(cmap)
198 |     'a'
199 |     >>> r.glyphName = "b"
200 |     >>> r.getSide1GlyphNameWithUnicodeValue(cmap)
201 |     'b'
202 |     >>> r = GlyphRecord("a")
203 |     >>> r.saveState(["a", "b"])
204 |     >>> r.glyphName = "a_b"
205 |     >>> r.getSide1GlyphNameWithUnicodeValue(cmap)
206 |     'a'
207 |     >>> r.getSide2GlyphNameWithUnicodeValue(cmap)
208 |     'b'
209 |     """
210 | 
211 | if __name__ == "__main__":
212 |     import doctest
213 |     doctest.testmod()
214 | 


--------------------------------------------------------------------------------
/Lib/compositor/layoutEngine.py:
--------------------------------------------------------------------------------
  1 | from compositor.tables import GSUB, GPOS, GDEF
  2 | from compositor.glyphRecord import GlyphRecord
  3 | from compositor.cmap import reverseCMAP
  4 | from compositor.textUtilities import convertCase
  5 | from compositor.error import CompositorError
  6 | from fontTools.misc.textTools import tostr
  7 | 
  8 | 
  9 | class LayoutEngine(object):
 10 | 
 11 |     def __init__(self):
 12 |         self.cmap = {}
 13 |         self.reversedCMAP = {}
 14 |         self.gdef = None
 15 |         self.gsub = None
 16 |         self.gpos = None
 17 |         self.fallbackGlyph = ".notdef"
 18 | 
 19 |     # ------------
 20 |     # data setting
 21 |     # ------------
 22 | 
 23 |     def setCMAP(self, cmap):
 24 |         self.cmap = cmap
 25 |         self.reversedCMAP = reverseCMAP(cmap)
 26 |         if self.gsub is not None:
 27 |             self.gsub.setCMAP(self.reversedCMAP)
 28 |         if self.gpos is not None:
 29 |             self.gpos.setCMAP(self.reversedCMAP)
 30 | 
 31 |     def setFeatureTables(self, gdef=None, gsub=None, gpos=None):
 32 |         self.gdef = None
 33 |         if gdef is not None:
 34 |             self.gdef = GDEF().loadFromFontTools(gdef)
 35 |         self.gsub = None
 36 |         if gsub is not None:
 37 |             self.gsub = GSUB().loadFromFontTools(gsub, self.reversedCMAP, self.gdef)
 38 |         self.gpos = None
 39 |         if gpos is not None:
 40 |             self.gpos = GPOS().loadFromFontTools(gpos, self.reversedCMAP, self.gdef)
 41 | 
 42 |     # -----------------
 43 |     # string processing
 44 |     # -----------------
 45 | 
 46 |     def stringToGlyphNames(self, string):
 47 |         glyphNames = []
 48 |         for c in string:
 49 |             c = tostr(c)
 50 |             v = ord(c)
 51 |             if v in self.cmap:
 52 |                 glyphNames.append(self.cmap[v])
 53 |             elif self.fallbackGlyph is not None:
 54 |                 glyphNames.append(self.fallbackGlyph)
 55 |         return glyphNames
 56 | 
 57 |     def stringToGlyphRecords(self, string):
 58 |         return [GlyphRecord(glyphName) for glyphName in self.stringToGlyphNames(string)]
 59 | 
 60 |     def glyphListToGlyphRecords(self, glyphList):
 61 |         glyphRecords = []
 62 |         for glyphName in glyphList:
 63 |             record = GlyphRecord(glyphName)
 64 |             glyphRecords.append(record)
 65 |         return glyphRecords
 66 | 
 67 |     def process(self, stringOrGlyphList, script="latn", langSys=None, rightToLeft=False, case="unchanged", logger=None):
 68 |         if isinstance(stringOrGlyphList, str):
 69 |             stringOrGlyphList = self.stringToGlyphNames(stringOrGlyphList)
 70 |         if case != "unchanged":
 71 |             l = langSys
 72 |             if l is not None:
 73 |                 l = l.strip()
 74 |             stringOrGlyphList = convertCase(case, stringOrGlyphList, self.cmap, self.reversedCMAP, l, self.fallbackGlyph)
 75 |         glyphRecords = self.glyphListToGlyphRecords(stringOrGlyphList)
 76 |         if rightToLeft:
 77 |             glyphRecords.reverse()
 78 |         if logger:
 79 |             logger.logStart()
 80 |             glyphNames = [r.glyphName for r in glyphRecords]
 81 |             logger.logMainSettings(glyphNames, script, langSys)
 82 |         self.willBeginProcessingGSUB(glyphRecords)
 83 |         if self.gsub is not None:
 84 | 
 85 |             if logger:
 86 |                 logger.logTableStart(self.gsub)
 87 |             glyphRecords = self.gsub.process(glyphRecords, script=script, langSys=langSys, logger=logger)
 88 |             if logger:
 89 |                 logger.logResults(glyphRecords)
 90 |                 logger.logTableEnd()
 91 | 
 92 |         self.didProcessingGSUB(glyphRecords)
 93 |         self.willBeginProcessingGPOS(glyphRecords)
 94 |         if self.gpos is not None:
 95 | 
 96 |             if logger:
 97 |                 logger.logTableStart(self.gpos)
 98 |             glyphRecords = self.gpos.process(glyphRecords, script=script, langSys=langSys, logger=logger)
 99 |             if logger:
100 |                 logger.logResults(glyphRecords)
101 |                 logger.logTableEnd()
102 |         self.didProcessingGPOS(glyphRecords)
103 |         if logger:
104 |             logger.logEnd()
105 |         return glyphRecords
106 | 
107 |     def willBeginProcessingGSUB(self, glyphRecords):
108 |         pass
109 | 
110 |     def didProcessingGSUB(self, glyphRecords):
111 |         pass
112 | 
113 |     def willBeginProcessingGPOS(self, glyphRecords):
114 |         pass
115 | 
116 |     def didProcessingGPOS(self, glyphRecords):
117 |         pass
118 | 
119 |     # ------------------
120 |     # feature management
121 |     # ------------------
122 | 
123 |     def getScriptList(self):
124 |         gsub = []
125 |         gpos = []
126 |         if self.gsub is not None:
127 |             gsub = self.gsub.getScriptList()
128 |         if self.gpos is not None:
129 |             gpos = self.gpos.getScriptList()
130 |         return sorted(set(gsub + gpos))
131 | 
132 |     def getLanguageList(self):
133 |         gsub = []
134 |         gpos = []
135 |         if self.gsub is not None:
136 |             gsub = self.gsub.getLanguageList()
137 |         if self.gpos is not None:
138 |             gpos = self.gpos.getLanguageList()
139 |         return sorted(set(gsub + gpos))
140 | 
141 |     def getFeatureList(self):
142 |         gsub = []
143 |         gpos = []
144 |         if self.gsub is not None:
145 |             gsub = self.gsub.getFeatureList()
146 |         if self.gpos is not None:
147 |             gpos = self.gpos.getFeatureList()
148 |         return sorted(set(gsub + gpos))
149 | 
150 |     def getFeatureState(self, featureTag):
151 |         gsubState = None
152 |         gposState = None
153 |         if self.gsub is not None:
154 |             if featureTag in self.gsub:
155 |                 gsubState = self.gsub.getFeatureState(featureTag)
156 |         if self.gpos is not None:
157 |             if featureTag in self.gpos:
158 |                 gposState = self.gpos.getFeatureState(featureTag)
159 |         if gsubState is not None and gposState is not None:
160 |             if gsubState != gposState:
161 |                 raise CompositorError("Inconsistently applied feature: %s" % featureTag)
162 |         if gsubState is not None:
163 |             return gsubState
164 |         if gposState is not None:
165 |             return gposState
166 |         raise CompositorError("Feature %s is is not contained in GSUB or GPOS" % featureTag)
167 | 
168 |     def setFeatureState(self, featureTag, state):
169 |         if self.gsub is not None:
170 |             if featureTag in self.gsub:
171 |                 self.gsub.setFeatureState(featureTag, state)
172 |         if self.gpos is not None:
173 |             if featureTag in self.gpos:
174 |                 self.gpos.setFeatureState(featureTag, state)
175 | 


--------------------------------------------------------------------------------
/Lib/compositor/logger.py:
--------------------------------------------------------------------------------
  1 | """
  2 | A simple logging object. It reports, with
  3 | the help of the Compositor objects, a
  4 | wide range of data about the processing
  5 | of a string of text for a font.
  6 | 
  7 | Usage:
  8 | 
  9 |     logger = Logger()
 10 |     logger.logStart()
 11 |     font = Font("/path/to/a/font.otf")
 12 |     font.process("Hello World!", logger=logger)
 13 |     logger.logEnd()
 14 |     report = logger.getText()
 15 | 
 16 | The returned log is in XML format.
 17 | """
 18 | from io import StringIO
 19 | from fontTools.misc.xmlWriter import XMLWriter
 20 | 
 21 | 
 22 | class Logger(object):
 23 | 
 24 |     def __init__(self):
 25 |         self._file = StringIO()
 26 |         self._writer = XMLWriter(self._file, encoding="utf-8")
 27 | 
 28 |     def __del__(self):
 29 |         self._writer = None
 30 |         self._file.close()
 31 | 
 32 |     def logStart(self):
 33 |         self._writer.begintag("xml")
 34 | 
 35 |     def logEnd(self):
 36 |         self._writer.endtag("xml")
 37 | 
 38 |     def logMainSettings(self, glyphNames, script, langSys):
 39 |         self._writer.begintag("initialSettings")
 40 |         self._writer.newline()
 41 |         self._writer.simpletag("string", value=" ".join(glyphNames))
 42 |         self._writer.newline()
 43 |         self._writer.simpletag("script", value=script)
 44 |         self._writer.newline()
 45 |         self._writer.simpletag("langSys", value=langSys)
 46 |         self._writer.newline()
 47 |         self._writer.endtag("initialSettings")
 48 |         self._writer.newline()
 49 | 
 50 |     def logTableStart(self, table):
 51 |         name = table.__class__.__name__
 52 |         self._writer.begintag("table", name=name)
 53 |         self._writer.newline()
 54 |         self.logTableFeatureStates(table)
 55 | 
 56 |     def logTableEnd(self):
 57 |         self._writer.endtag("table")
 58 | 
 59 |     def logTableFeatureStates(self, table):
 60 |         self._writer.begintag("featureStates")
 61 |         self._writer.newline()
 62 |         for tag in sorted(table.getFeatureList()):
 63 |             state = table.getFeatureState(tag)
 64 |             self._writer.simpletag("feature", name=tag, state=int(state))
 65 |             self._writer.newline()
 66 |         self._writer.endtag("featureStates")
 67 |         self._writer.newline()
 68 | 
 69 |     def logApplicableLookups(self, table, lookups):
 70 |         self._writer.begintag("applicableLookups")
 71 |         self._writer.newline()
 72 |         if lookups:
 73 |             order = []
 74 |             last = None
 75 |             for tag, lookup in lookups:
 76 |                 if tag != last:
 77 |                     if order:
 78 |                         self._logLookupList(last, order)
 79 |                     order = []
 80 |                     last = tag
 81 |                 index = table.LookupList.Lookup.index(lookup)
 82 |                 order.append(index)
 83 |             self._logLookupList(last, order)
 84 |         self._writer.endtag("applicableLookups")
 85 |         self._writer.newline()
 86 | 
 87 |     def _logLookupList(self, tag, lookups):
 88 |         lookups = " ".join([str(i) for i in lookups])
 89 |         self._writer.simpletag("lookups", feature=tag, indices=lookups)
 90 |         self._writer.newline()
 91 | 
 92 |     def logProcessingStart(self):
 93 |         self._writer.begintag("processing")
 94 |         self._writer.newline()
 95 | 
 96 |     def logProcessingEnd(self):
 97 |         self._writer.endtag("processing")
 98 |         self._writer.newline()
 99 | 
100 |     def logLookupStart(self, table, tag, lookup):
101 |         index = table.LookupList.Lookup.index(lookup)
102 |         self._writer.begintag("lookup", feature=tag, index=index)
103 |         self._writer.newline()
104 | 
105 |     def logLookupEnd(self):
106 |         self._writer.endtag("lookup")
107 |         self._writer.newline()
108 | 
109 |     def logSubTableStart(self, lookup, subtable):
110 |         index = lookup.SubTable.index(subtable)
111 |         lookupType = subtable.__class__.__name__
112 |         self._writer.begintag("subTable", index=index, type=lookupType)
113 |         self._writer.newline()
114 | 
115 |     def logSubTableEnd(self):
116 |         self._writer.endtag("subTable")
117 |         self._writer.newline()
118 | 
119 |     def logGlyphRecords(self, glyphRecords):
120 |         for r in glyphRecords:
121 |             self._writer.simpletag("glyphRecord", name=r.glyphName,
122 |                 xPlacement=r.xPlacement, yPlacement=r.yPlacement,
123 |                 xAdvance=r.xAdvance, yAdvance=r.yAdvance)
124 |             self._writer.newline()
125 | 
126 |     def logInput(self, processed, unprocessed):
127 |         self._writer.begintag("input")
128 |         self._writer.newline()
129 |         self._writer.begintag("processed")
130 |         self._writer.newline()
131 |         self.logGlyphRecords(processed)
132 |         self._writer.endtag("processed")
133 |         self._writer.newline()
134 |         self._writer.begintag("unprocessed")
135 |         self._writer.newline()
136 |         self.logGlyphRecords(unprocessed)
137 |         self._writer.endtag("unprocessed")
138 |         self._writer.newline()
139 |         self._writer.endtag("input")
140 |         self._writer.newline()
141 | 
142 |     def logOutput(self, processed, unprocessed):
143 |         self._writer.begintag("output")
144 |         self._writer.newline()
145 |         self._writer.begintag("processed")
146 |         self._writer.newline()
147 |         self.logGlyphRecords(processed)
148 |         self._writer.endtag("processed")
149 |         self._writer.newline()
150 |         self._writer.begintag("unprocessed")
151 |         self._writer.newline()
152 |         self.logGlyphRecords(unprocessed)
153 |         self._writer.endtag("unprocessed")
154 |         self._writer.newline()
155 |         self._writer.endtag("output")
156 |         self._writer.newline()
157 | 
158 |     def logResults(self, processed):
159 |         self._writer.begintag("results")
160 |         self._writer.newline()
161 |         self.logGlyphRecords(processed)
162 |         self._writer.endtag("results")
163 |         self._writer.newline()
164 | 
165 |     def getText(self):
166 |         return self._file.getvalue()
167 | 


--------------------------------------------------------------------------------
/Lib/compositor/lookupList.py:
--------------------------------------------------------------------------------
  1 | """
  2 | GSUB and GPOS LookupList objects (and friends).
  3 | """
  4 | 
  5 | 
  6 | import weakref
  7 | from compositor.subTablesGSUB import *
  8 | from compositor.subTablesGPOS import *
  9 | 
 10 | 
 11 | # ------------
 12 | # Base Classes
 13 | # ------------
 14 | 
 15 | 
 16 | class BaseLookupList(object):
 17 | 
 18 |     __slots__ = ["LookupCount", "Lookup", "__weakref__"]
 19 |     _LookupClass = None
 20 | 
 21 |     def __init__(self):
 22 |         self.LookupCount = 0
 23 |         self.Lookup = []
 24 | 
 25 |     def loadFromFontTools(self, lookupList, gdef):
 26 |         self.LookupCount = lookupList.LookupCount
 27 |         self.Lookup = [self._LookupClass().loadFromFontTools(lookup, self, gdef) for lookup in lookupList.Lookup]
 28 |         return self
 29 | 
 30 | 
 31 | class BaseLookup(object):
 32 | 
 33 |     __slots__ = ["LookupType", "LookupFlag", "SubTableCount", "SubTable",
 34 |                 "_lookupList", "_gdefReference", "__weakref__"]
 35 | 
 36 |     def __init__(self):
 37 |         self._lookupList = None
 38 |         self._gdefReference = None
 39 |         self.LookupType = None
 40 |         self.LookupFlag = None
 41 |         self.SubTableCount = 0
 42 |         self.SubTable = []
 43 | 
 44 |     def loadFromFontTools(self, lookup, lookupList, gdef):
 45 |         self._lookupList = weakref.ref(lookupList)
 46 |         if gdef is not None:
 47 |             gdef = weakref.ref(gdef)
 48 |         self._gdefReference = gdef
 49 |         self.LookupType = lookup.LookupType
 50 |         self.LookupFlag = LookupFlag().loadFromFontTools(lookup.LookupFlag, gdef)
 51 |         self.SubTableCount = lookup.SubTableCount
 52 |         self.SubTable = []
 53 |         for subtable in lookup.SubTable:
 54 |             format = None
 55 |             if hasattr(subtable, "Format"):
 56 |                 format = subtable.Format
 57 |             cls = self._lookupSubTableClass(format)
 58 |             obj = cls().loadFromFontTools(subtable, self)
 59 |             self.SubTable.append(obj)
 60 |         return self
 61 | 
 62 |     def _get_gdef(self):
 63 |         if self._gdefReference is not None:
 64 |             return self._gdefReference()
 65 |         return None
 66 | 
 67 |     _gdef = property(_get_gdef)
 68 | 
 69 | 
 70 | class LookupFlag(object):
 71 | 
 72 |     __slots__ = ["_gdef", "_flag"]
 73 | 
 74 |     def __init__(self):
 75 |         self._gdef = None
 76 |         self._flag = None
 77 | 
 78 |     def loadFromFontTools(self, lookupFlag, gdef):
 79 |         self._gdef = gdef
 80 |         self._flag = lookupFlag
 81 |         return self
 82 | 
 83 |     def _get_haveIgnore(self):
 84 |         return bool(self._flag & 0x0E)
 85 | 
 86 |     _haveIgnore = property(_get_haveIgnore)
 87 | 
 88 |     def _get_RightToLeft(self):
 89 |         return bool(self._flag & 0x0001)
 90 | 
 91 |     RightToLeft = property(_get_RightToLeft)
 92 | 
 93 |     def _get_IgnoreBaseGlyphs(self):
 94 |         return bool(self._flag & 0x0002)
 95 | 
 96 |     IgnoreBaseGlyphs = property(_get_IgnoreBaseGlyphs)
 97 | 
 98 |     def _get_IgnoreLigatures(self):
 99 |         return bool(self._flag & 0x0004)
100 | 
101 |     IgnoreLigatures = property(_get_IgnoreLigatures)
102 | 
103 |     def _get_IgnoreMarks(self):
104 |         return bool(self._flag & 0x0008)
105 | 
106 |     IgnoreMarks = property(_get_IgnoreMarks)
107 | 
108 |     def _get_MarkAttachmentType(self):
109 |         return bool(self._flag & 0xFF00)
110 | 
111 |     MarkAttachmentType = property(_get_MarkAttachmentType)
112 | 
113 |     def coversGlyph(self, glyphName):
114 |         gdef = self._gdef
115 |         if gdef is None:
116 |             return False
117 |         gdef = gdef()
118 |         cls = gdef.GlyphClassDef[glyphName]
119 |         if cls == 0:
120 |             return False
121 |         if self._haveIgnore:
122 |             if cls == 1 and self.IgnoreBaseGlyphs: #IgnoreBaseGlyphs
123 |                 return True
124 |             if cls == 2 and self.IgnoreLigatures: #IgnoreLigatures
125 |                 return True
126 |             if cls == 3 and self.IgnoreMarks: #IgnoreMarks
127 |                 return True
128 |         if self.MarkAttachmentType and cls == 3:
129 |             if gdef.MarkAttachClassDef is None:
130 |                 return False
131 |             markClass = gdef.MarkAttachClassDef[glyphName]
132 |             if (self._flag & 0xff00) >> 8 != markClass:
133 |                 return True
134 |         return False
135 | 
136 | # ----
137 | # GSUB
138 | # ----
139 | 
140 | 
141 | class GSUBLookup(BaseLookup):
142 | 
143 |     __slots__ = []
144 | 
145 |     def _lookupSubTableClass(self, subtableFormat):
146 |         lookupType = self.LookupType
147 |         if lookupType == 1:
148 |             cls = GSUBLookupType1Format2
149 |         elif lookupType == 2:
150 |             cls = GSUBLookupType2
151 |         elif lookupType == 3:
152 |             cls = GSUBLookupType3
153 |         elif lookupType == 4:
154 |             cls = GSUBLookupType4
155 |         elif lookupType == 5:
156 |             cls = (GSUBLookupType5Format1, GSUBLookupType5Format2, GSUBLookupType5Format3)[subtableFormat-1]
157 |         elif lookupType == 6:
158 |             cls = (GSUBLookupType6Format1, GSUBLookupType6Format2, GSUBLookupType6Format3)[subtableFormat-1]
159 |         elif lookupType == 7:
160 |             cls = GSUBLookupType7
161 |         elif lookupType == 8:
162 |             cls = GSUBLookupType8
163 |         return cls
164 | 
165 | 
166 | class GSUBLookupList(BaseLookupList):
167 | 
168 |     __slots__ = []
169 |     _LookupClass = GSUBLookup
170 | 
171 | 
172 | # ----
173 | # GPOS
174 | # ----
175 | 
176 | 
177 | class GPOSLookup(BaseLookup):
178 | 
179 |     __slots__ = []
180 | 
181 |     def _lookupSubTableClass(self, subtableFormat):
182 |         lookupType = self.LookupType
183 |         if lookupType == 1:
184 |             cls = (GPOSLookupType1Format1, GPOSLookupType1Format2)[subtableFormat-1]
185 |         elif lookupType == 2:
186 |             cls = (GPOSLookupType2Format1, GPOSLookupType2Format2)[subtableFormat-1]
187 |         elif lookupType == 3:
188 |             cls = GPOSLookupType3
189 |         elif lookupType == 4:
190 |             cls = GPOSLookupType4
191 |         elif lookupType == 5:
192 |             cls = GPOSLookupType5
193 |         elif lookupType == 6:
194 |             cls = GPOSLookupType6
195 |         elif lookupType == 7:
196 |             cls = (GPOSLookupType7Format1, GPOSLookupType7Format2, GPOSLookupType7Format3)[subtableFormat-1]
197 |         elif lookupType == 8:
198 |             cls = (GPOSLookupType8Format1, GPOSLookupType8Format2, GPOSLookupType8Format3)[subtableFormat-1]
199 |         elif lookupType == 9:
200 |             cls = GPOSLookupType9
201 |         return cls
202 | 
203 | 
204 | class GPOSLookupList(BaseLookupList):
205 | 
206 |     __slots__ = []
207 |     _LookupClass = GPOSLookup
208 | 


--------------------------------------------------------------------------------
/Lib/compositor/scriptList.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ScriptList object (and friends).
 3 | """
 4 | 
 5 | __all__ = ["ScriptList", "ScriptRecord", "LangSysRecord"]
 6 | 
 7 | class ScriptList(object):
 8 | 
 9 |     __slots__ = ["ScriptCount", "ScriptRecord"]
10 | 
11 |     def __init__(self):
12 |         self.ScriptCount = 0
13 |         self.ScriptRecord = None
14 | 
15 |     def loadFromFontTools(self, scriptList):
16 |         self.ScriptCount = scriptList.ScriptCount
17 |         self.ScriptRecord = [ScriptRecord().loadFromFontTools(record) for record in scriptList.ScriptRecord]
18 |         return self
19 | 
20 | 
21 | class ScriptRecord(object):
22 | 
23 |     __slots__ = ["ScriptTag", "Script"]
24 | 
25 |     def __init__(self):
26 |         self.ScriptTag = None
27 |         self.Script = None
28 | 
29 |     def loadFromFontTools(self, scriptRecord):
30 |         self.ScriptTag = scriptRecord.ScriptTag
31 |         self.Script = Script().loadFromFontTools(scriptRecord.Script)
32 |         return self
33 | 
34 | 
35 | class Script(object):
36 | 
37 |     __slots__ = ["DefaultLangSys", "LangSysCount", "LangSysRecord"]
38 | 
39 |     def __init__(self):
40 |         self.DefaultLangSys = None
41 |         self.LangSysCount = 0
42 |         self.LangSysRecord = []
43 | 
44 |     def loadFromFontTools(self, script):
45 |         self.DefaultLangSys = None
46 |         if script.DefaultLangSys is not None:
47 |             self.DefaultLangSys = LangSys().loadFromFontTools(script.DefaultLangSys)
48 |         self.LangSysCount = script.LangSysCount
49 |         self.LangSysRecord = [LangSysRecord().loadFromFontTools(record) for record in script.LangSysRecord]
50 |         return self
51 | 
52 | 
53 | class LangSysRecord(object):
54 | 
55 |     __slots__ = ["LangSysTag", "LangSys"]
56 | 
57 |     def __init__(self):
58 |         self.LangSysTag = None
59 |         self.LangSys = None
60 | 
61 |     def loadFromFontTools(self, langSysRecord):
62 |         self.LangSysTag = langSysRecord.LangSysTag
63 |         self.LangSys = LangSys().loadFromFontTools(langSysRecord.LangSys)
64 |         return self
65 | 
66 | 
67 | class LangSys(object):
68 | 
69 |     __slots__ = ["LookupOrder", "ReqFeatureIndex", "FeatureCount", "FeatureIndex"]
70 | 
71 |     def __init__(self):
72 |         self.LookupOrder = None
73 |         self.ReqFeatureIndex = None
74 |         self.FeatureCount = 0
75 |         self.FeatureIndex = []
76 | 
77 |     def loadFromFontTools(self, langSys):
78 |         self.LookupOrder = langSys.LookupOrder # XXX?
79 |         self.ReqFeatureIndex = langSys.ReqFeatureIndex
80 |         self.FeatureCount = langSys.FeatureCount
81 |         self.FeatureIndex = list(langSys.FeatureIndex)
82 |         return self
83 | 


--------------------------------------------------------------------------------
/Lib/compositor/subTablesBase.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import weakref
  3 | 
  4 | # ------------
  5 | # Base Classes
  6 | # ------------
  7 | 
  8 | 
  9 | class BaseSubTable(object):
 10 | 
 11 |     """
 12 |     This object implents the base level subtable behavior
 13 |     for GSUB and GPOS subtables. It establishes one private
 14 |     attribute, _lookup, which is a weak reference to the
 15 |     lookup that contains the subtable.
 16 |     """
 17 | 
 18 |     __slots__ = ["_lookup"]
 19 | 
 20 |     def __init__(self):
 21 |         self._lookup = None
 22 | 
 23 |     def loadFromFontTools(self, subtable, lookup):
 24 |         self._lookup = weakref.ref(lookup)
 25 |         return self
 26 | 
 27 |     def process(self, processed, glyphRecords, featureTag):
 28 |         if self._lookup is not None and hasattr(self._lookup(), "LookupType"):
 29 |             lookupType = self._lookup().LookupType
 30 |         else:
 31 |             lookupType = "Unknown"
 32 |         if hasattr(self, "SubstFormat"):
 33 |             format = str(self.SubstFormat)
 34 |         elif hasattr(self, "PosFormat"):
 35 |             format = str(self.PosFormat)
 36 |         else:
 37 |             format = "Unknown"
 38 |         className = self.__class__.__name__
 39 |         print("[Compositor] %s skipping Lookup Type %s Format %s" % (className, lookupType, format))
 40 |         return processed, glyphRecords, False
 41 | 
 42 |     def _lookupFlagCoversGlyph(self, glyphName):
 43 |         return self._lookup().LookupFlag.coversGlyph(glyphName)
 44 | 
 45 |     def _nextRecord(self, glyphRecords):
 46 |         nextRecord = None
 47 |         nextRecordIndex = 0
 48 |         while nextRecord is None:
 49 |             for _nextRecord in glyphRecords:
 50 |                 _nextGlyph = _nextRecord.glyphName
 51 |                 if not self._lookupFlagCoversGlyph(_nextGlyph):
 52 |                     nextRecord = _nextRecord
 53 |                     break
 54 |                 nextRecordIndex += 1
 55 |             break
 56 |         return nextRecord, nextRecordIndex
 57 | 
 58 | 
 59 | class BaseContextSubTable(BaseSubTable):
 60 | 
 61 |     __slots__ = []
 62 | 
 63 |     def _processMatch(self, rule, processed, glyphRecords, inputGlyphCount, matchedIndexes, featureTag):
 64 |             performedAction = False
 65 |             if not rule._ActionCount:
 66 |                 performedAction = True
 67 |                 processed.extend(glyphRecords[:inputGlyphCount])
 68 |                 glyphRecords = glyphRecords[inputGlyphCount:]
 69 |             else:
 70 |                 eligibleRecords = glyphRecords[:inputGlyphCount]
 71 |                 ineligibleRecords = glyphRecords[inputGlyphCount:]
 72 |                 for record in rule._ActionLookupRecord:
 73 |                     sequenceIndex = record.SequenceIndex
 74 |                     matchIndex = matchedIndexes[sequenceIndex]
 75 | 
 76 |                     backRecords = eligibleRecords[:matchIndex]
 77 |                     inputRecords = eligibleRecords[matchIndex:]
 78 | 
 79 |                     lookupListIndex = record.LookupListIndex
 80 |                     lookup = self._lookup()._lookupList().Lookup[lookupListIndex]
 81 | 
 82 |                     for subtable in lookup.SubTable:
 83 |                         backRecords, inputRecords, performedAction = subtable.process(backRecords, inputRecords, featureTag)
 84 |                         if performedAction:
 85 |                             break
 86 |                     if performedAction:
 87 |                         eligibleRecords = backRecords + inputRecords
 88 |                 processed.extend(eligibleRecords)
 89 |                 glyphRecords = ineligibleRecords
 90 |             return processed, glyphRecords, performedAction
 91 | 
 92 | 
 93 | class BaseChainingContextSubTable(BaseContextSubTable):
 94 | 
 95 |     __slots__ = []
 96 | 
 97 |     def _testContext(self, testSource, testAgainst, matchCount, additionObjects=None):
 98 |         # this procedure is common across all formats
 99 |         # with the exception of evaluating if a particular
100 |         # glyph matches a position in the context.
101 |         # to handle this, the comparison is evaluated
102 |         # by a _evaluateContextItem method in each
103 |         # subclass. the speed penalty for this is negligible.
104 |         # the aditionalObjects arg will be ignored by
105 |         # all formats except format 2 which needs a ClassDef
106 |         # to perform the comparison.
107 |         completeRun = []
108 |         matchedIndexes = []
109 |         matched = 0
110 |         while matched < matchCount:
111 |             for recordIndex, glyphRecord in enumerate(testSource):
112 |                 completeRun.append(glyphRecord)
113 |                 glyphName = glyphRecord.glyphName
114 |                 if not self._lookupFlagCoversGlyph(glyphName):
115 |                     if not self._evaluateContextItem(glyphName, testAgainst[matched], additionObjects):
116 |                         break
117 |                     matched += 1
118 |                     matchedIndexes.append(recordIndex)
119 |                     if matched == matchCount:
120 |                         break
121 |             break
122 |         return matched == matchCount, completeRun, matchedIndexes
123 | 
124 | 
125 | class BaseContextFormat1SubTable(BaseContextSubTable):
126 | 
127 |     __slots__ = []
128 | 
129 |     def process(self, processed, glyphRecords, featureTag):
130 |         performedAction = False
131 |         currentRecord = glyphRecords[0]
132 |         currentGlyph = currentRecord.glyphName
133 |         if currentGlyph in self.Coverage:
134 |             if not self._lookupFlagCoversGlyph(currentGlyph):
135 |                 coverageIndex = self.Coverage.index(currentGlyph)
136 |                 ruleSet = self._RuleSet[coverageIndex]
137 |                 for rule in ruleSet._Rule:
138 |                     matchedIndexes = [0]
139 |                     currentGlyphIndex = 1
140 |                     for input in rule.Input:
141 |                         glyphRecord, relativeIndex = self._nextRecord(glyphRecords[currentGlyphIndex:])
142 |                         currentGlyphIndex += relativeIndex
143 |                         if glyphRecord is not None:
144 |                             glyphName = glyphRecord.glyphName
145 |                             if glyphName != input:
146 |                                 break
147 |                             else:
148 |                                 matchedIndexes.append(currentGlyphIndex)
149 |                             currentGlyphIndex += 1
150 |                     if len(matchedIndexes) == rule.GlyphCount:
151 |                         inputGlyphCount = matchedIndexes[-1] + 1
152 |                         processed, glyphRecords, performedAction = self._processMatch(rule, processed, glyphRecords, inputGlyphCount, matchedIndexes, featureTag)
153 |                         if performedAction:
154 |                             break
155 |         return processed, glyphRecords, performedAction
156 | 
157 | 
158 | class BaseContextFormat2SubTable(BaseContextSubTable):
159 | 
160 |     __slots__ = []
161 | 
162 |     def process(self, processed, glyphRecords, featureTag):
163 |         performedAction = False
164 |         currentRecord = glyphRecords[0]
165 |         currentGlyph = currentRecord.glyphName
166 |         if currentGlyph in self.Coverage:
167 |             if not self._lookupFlagCoversGlyph(currentGlyph):
168 |                 classIndex = self.ClassDef[currentGlyph]
169 |                 classSet = self._ClassSet[classIndex]
170 |                 if classSet is not None:
171 |                     matchedIndexes = [0]
172 |                     currentGlyphIndex = 1
173 |                     for classRule in classSet._ClassRule:
174 |                         for inputClass in classRule.Class:
175 |                             glyphRecord, relativeIndex = self._nextRecord(glyphRecords[currentGlyphIndex:])
176 |                             currentGlyphIndex += relativeIndex
177 |                             if glyphRecord is not None:
178 |                                 glyphName = glyphRecord.glyphName
179 |                                 glyphClass = self.ClassDef[glyphName]
180 |                                 if glyphClass != inputClass:
181 |                                     break
182 |                                 else:
183 |                                     matchedIndexes.append(currentGlyphIndex)
184 |                                 currentGlyphIndex += 1
185 |                         if len(matchedIndexes) == classRule.GlyphCount:
186 |                             inputGlyphCount = matchedIndexes[-1] + 1
187 |                             processed, glyphRecords, performedAction = self._processMatch(classRule, processed, glyphRecords, inputGlyphCount, matchedIndexes, featureTag)
188 |         return processed, glyphRecords, performedAction
189 | 
190 | 
191 | class BaseContextFormat3SubTable(BaseContextSubTable):
192 | 
193 |     __slots__ = []
194 | 
195 |     def process(self, processed, glyphRecords, featureTag):
196 |         performedAction = False
197 |         matchedIndexes = []
198 |         currentGlyphIndex = 0
199 |         for coverage in self.Coverage:
200 |             glyphRecord, relativeIndex = self._nextRecord(glyphRecords[currentGlyphIndex:])
201 |             currentGlyphIndex += relativeIndex
202 |             currentGlyph = glyphRecord.glyphName
203 |             if currentGlyph not in coverage:
204 |                 break
205 |             matchedIndexes.append(currentGlyphIndex)
206 |             currentGlyphIndex += 1
207 |         if len(matchedIndexes) == self.GlyphCount:
208 |             inputGlyphCount = matchedIndexes[-1] + 1
209 |             processed, glyphRecords, performedAction = self._processMatch(self, processed, glyphRecords, inputGlyphCount, matchedIndexes, featureTag)
210 |         return processed, glyphRecords, performedAction
211 | 
212 | 
213 | class BaseChainingContextFormat1SubTable(BaseChainingContextSubTable):
214 | 
215 |     __slots__ = []
216 | 
217 |     def process(self, processed, glyphRecords, featureTag):
218 |         performedAction = False
219 |         currentRecord = glyphRecords[0]
220 |         currentGlyph = currentRecord.glyphName
221 |         if currentGlyph in self.Coverage:
222 |             for chainRuleSet in self._ChainRuleSet:
223 |                 for chainRule in chainRuleSet._ChainRule:
224 |                     # backtrack testing
225 |                     backtrackCount = chainRule.BacktrackGlyphCount
226 |                     if not backtrackCount:
227 |                         backtrackMatch = True
228 |                     else:
229 |                         backtrackMatch, backtrack, backtrackMatchIndexes = self._testContext(reversed(processed), chainRule.Backtrack, backtrackCount)
230 |                     if not backtrackMatch:
231 |                         continue
232 |                     # input testing
233 |                     inputCount = chainRule.InputGlyphCount
234 |                     if not inputCount:
235 |                         inputMatch = True
236 |                     else:
237 |                         inputMatch, input, inputMatchIndexes = self._testContext(glyphRecords[1:], chainRule.Input, inputCount-1)
238 |                     if not inputMatch:
239 |                         continue
240 |                     input = [currentRecord] + input
241 |                     inputMatchIndexes = [0] + [i + 1 for i in inputMatchIndexes]
242 |                     # look ahead testing
243 |                     lookAheadCount = chainRule.LookAheadGlyphCount
244 |                     if not lookAheadCount:
245 |                         lookAheadMatch = True
246 |                     else:
247 |                         lookAheadMatch, lookAhead, lookAheadMatchIndexes = self._testContext(glyphRecords[len(input):], chainRule.LookAhead, lookAheadCount)
248 |                     if not lookAheadMatch:
249 |                         continue
250 |                     # match. process.
251 |                     if backtrackMatch and inputMatch and lookAheadMatch:
252 |                         processed, glyphRecords, performedAction = self._processMatch(chainRule, processed, glyphRecords, len(input), inputMatchIndexes, featureTag)
253 |                         if performedAction:
254 |                             # break the chainRule loop
255 |                             break
256 | 
257 |                 if performedAction:
258 |                     # break the chainRuleSet loop
259 |                     break
260 |         return processed, glyphRecords, performedAction
261 | 
262 |     def _evaluateContextItem(self, glyphName, contextTest, additionalObject):
263 |         return glyphName == contextTest
264 | 
265 | 
266 | class BaseChainingContextFormat2SubTable(BaseChainingContextSubTable):
267 | 
268 |     __slots__ = []
269 | 
270 |     def process(self, processed, glyphRecords, featureTag):
271 |         performedAction = False
272 |         currentRecord = glyphRecords[0]
273 |         currentGlyph = currentRecord.glyphName
274 |         if currentGlyph in self.Coverage:
275 |             if not self._lookupFlagCoversGlyph(currentGlyph):
276 |                 classIndex = self.InputClassDef[currentGlyph]
277 |                 chainClassSet = self._ChainClassSet[classIndex]
278 |                 if chainClassSet is not None:
279 |                     for chainClassRule in chainClassSet._ChainClassRule:
280 |                         # backtrack testing
281 |                         backtrackCount = chainClassRule.BacktrackGlyphCount
282 |                         if not backtrackCount:
283 |                             backtrackMatch = True
284 |                         else:
285 |                             backtrackMatch, backtrack, backtrackMatchIndexes = self._testContext(reversed(processed), chainClassRule.Backtrack, backtrackCount, self.BacktrackClassDef)
286 |                         if not backtrackMatch:
287 |                             continue
288 |                         # input testing
289 |                         inputCount = chainClassRule.InputGlyphCount
290 |                         if not inputCount:
291 |                             inputMatch = True
292 |                         else:
293 |                             inputMatch, input, inputMatchIndexes = self._testContext(glyphRecords[1:], chainClassRule.Input, inputCount-1, self.InputClassDef)
294 |                         if not inputMatch:
295 |                             continue
296 |                         input = [currentRecord] + input
297 |                         inputMatchIndexes = [0] + [i + 1 for i in inputMatchIndexes]
298 |                         # look ahead testing
299 |                         lookAheadCount = chainClassRule.LookAheadGlyphCount
300 |                         if not lookAheadCount:
301 |                             lookAheadMatch = True
302 |                         else:
303 |                             lookAheadMatch, lookAhead, lookAheadMatchIndexes = self._testContext(glyphRecords[len(input):], chainClassRule.LookAhead, lookAheadCount, self.LookAheadClassDef)
304 |                         if not lookAheadMatch:
305 |                             continue
306 |                         # match. process.
307 |                         if backtrackMatch and inputMatch and lookAheadMatch:
308 |                             processed, glyphRecords, performedAction = self._processMatch(chainClassRule, processed, glyphRecords, len(input), inputMatchIndexes, featureTag)
309 |                             if performedAction:
310 |                                 break
311 |         return processed, glyphRecords, performedAction
312 | 
313 |     def _evaluateContextItem(self, glyphName, contextTest, additionalObject):
314 |         classDef = additionalObject
315 |         classIndex = classDef[glyphName]
316 |         return classIndex == contextTest
317 | 
318 | 
319 | class BaseChainingContextFormat3SubTable(BaseChainingContextSubTable):
320 | 
321 |     """
322 |     This object implements chaining contextual format 3.
323 |     It is shared across GSUB and GPOS contextual subtables.
324 |     """
325 | 
326 |     __slots__ = ["BacktrackGlyphCount", "BacktrackCoverage", "InputGlyphCount",
327 |                 "InputCoverage", "LookAheadGlyphCount", "LookAheadCoverage"]
328 | 
329 |     def __init__(self):
330 |         super(BaseChainingContextFormat3SubTable, self).__init__()
331 |         self.BacktrackGlyphCount = 0
332 |         self.BacktrackCoverage = []
333 |         self.InputGlyphCount = 0
334 |         self.InputCoverage = []
335 |         self.LookAheadGlyphCount = 0
336 |         self.LookAheadCoverage = []
337 | 
338 |     def loadFromFontTools(self, subtable, lookup):
339 |         super(BaseChainingContextFormat3SubTable, self).loadFromFontTools(subtable, lookup)
340 |         self.BacktrackGlyphCount = subtable.BacktrackGlyphCount
341 |         self.BacktrackCoverage = [Coverage().loadFromFontTools(coverage) for coverage in subtable.BacktrackCoverage]
342 |         self.InputGlyphCount = subtable.InputGlyphCount
343 |         self.InputCoverage = [Coverage().loadFromFontTools(coverage) for coverage in subtable.InputCoverage]
344 |         self.LookAheadGlyphCount = subtable.LookAheadGlyphCount
345 |         self.LookAheadCoverage = [Coverage().loadFromFontTools(coverage) for coverage in subtable.LookAheadCoverage]
346 |         return self
347 | 
348 |     def process(self, processed, glyphRecords, featureTag):
349 |         performedAction = False
350 |         while 1:
351 |             # backtrack testing
352 |             backtrackCount = self.BacktrackGlyphCount
353 |             if not backtrackCount:
354 |                 backtrackMatch = True
355 |             else:
356 |                 backtrackMatch, backtrack, backtrackMatchIndexes = self._testContext(reversed(processed), self.BacktrackCoverage, backtrackCount)
357 |             if not backtrackMatch:
358 |                 break
359 |             # input testing
360 |             inputCount = self.InputGlyphCount
361 |             if not inputCount:
362 |                 inputMatch = True
363 |             else:
364 |                 inputMatch, input, inputMatchIndexes = self._testContext(glyphRecords, self.InputCoverage, inputCount)
365 |             if not inputMatch:
366 |                 break
367 |             # look ahead testing
368 |             lookAheadCount = self.LookAheadGlyphCount
369 |             if not lookAheadCount:
370 |                 lookAheadMatch = True
371 |             else:
372 |                 lookAheadMatch, lookAhead, lookAheadMatchIndexes = self._testContext(glyphRecords[len(input):], self.LookAheadCoverage, lookAheadCount)
373 |             if not lookAheadMatch:
374 |                 break
375 |             # match. process.
376 |             if backtrackMatch and inputMatch and lookAheadMatch:
377 |                 processed, glyphRecords, performedAction = self._processMatch(self, processed, glyphRecords, len(input), inputMatchIndexes, featureTag)
378 |             # break the while
379 |             break
380 |         return processed, glyphRecords, performedAction
381 | 
382 |     def _evaluateContextItem(self, glyphName, contextTest, additionalObject):
383 |         return glyphName in contextTest
384 | 
385 | 
386 | class BaseLookupRecord(object):
387 | 
388 |     """
389 |     This object implements the functionality of both
390 |     GSUB SubstLookupRecord and GPOS PosLookupRecord.
391 |     """
392 | 
393 |     __slots__ = ["SequenceIndex", "LookupListIndex"]
394 | 
395 |     def __init__(self):
396 |         self.SequenceIndex = None
397 |         self.LookupListIndex = None
398 | 
399 |     def loadFromFontTools(self, record):
400 |         self.SequenceIndex = record.SequenceIndex
401 |         self.LookupListIndex = record.LookupListIndex
402 |         return self
403 | 
404 | 
405 | # --------
406 | # Coverage
407 | # --------
408 | 
409 | 
410 | class Coverage(object):
411 | 
412 |     """
413 |     fontTools abstracts CoverageFormat1 and
414 |     CoverageFormat2 into a common Coverage
415 |     object. The same is done here. Consequently
416 |     the structure of this object does not closely
417 |     follow the specification. Instead, the basic
418 |     functionality is implemented through standard
419 |     dict methods.
420 | 
421 |     To determine if a glyph is in the coverage:
422 |         >>> "x" in coverage
423 |         True
424 | 
425 |     To get the index for a particular glyph:
426 |         >>> coverage.index("x")
427 |         330
428 |     """
429 | 
430 |     __slots__ = ["_glyphs"]
431 | 
432 | 
433 |     def __init__(self, coverage=None):
434 |         if coverage is not None:
435 |             coverage = list(coverage)
436 |         self._glyphs = coverage
437 | 
438 |     def loadFromFontTools(self, coverage):
439 |         # the data coming in could be a fontTools
440 |         # Coverage object or a list of glyph names
441 |         if not isinstance(coverage, list):
442 |             coverage = coverage.glyphs
443 |         self._glyphs = list(coverage)
444 |         return self
445 | 
446 |     def __contains__(self, glyphName):
447 |         return glyphName in self._glyphs
448 | 
449 |     def index(self, glyphName):
450 |         return self._glyphs.index(glyphName)
451 | 
452 |     def _get_Glyphs(self):
453 |         return list(self._glyphs)
454 | 
455 |     Glyphs = property(_get_Glyphs, doc="This is for reference only. Not for use in processing.")
456 | 


--------------------------------------------------------------------------------
/Lib/compositor/subTablesGSUB.py:
--------------------------------------------------------------------------------
  1 | from random import choice
  2 | from compositor.classDefinitionTables import ClassDef
  3 | from compositor.glyphRecord import glyphNamesToGlyphRecords
  4 | from compositor.subTablesBase import BaseSubTable, BaseLookupRecord, Coverage,\
  5 |     BaseContextFormat1SubTable, BaseContextFormat2SubTable, BaseContextFormat3SubTable,\
  6 |     BaseChainingContextFormat1SubTable, BaseChainingContextFormat2SubTable, BaseChainingContextFormat3SubTable
  7 | 
  8 | 
  9 | __all__ = [
 10 |         "GSUBLookupType1Format2", "GSUBLookupType2", "GSUBLookupType3", "GSUBLookupType4",
 11 |         "GSUBLookupType5Format1", "GSUBLookupType5Format2", "GSUBLookupType5Format3",
 12 |         "GSUBLookupType6Format1", "GSUBLookupType6Format2", "GSUBLookupType6Format3",
 13 |         "GSUBLookupType7", "GSUBLookupType8"
 14 |         ]
 15 | 
 16 | 
 17 | globalSubstitutionSubTableSlots = ["SubstFormat"]
 18 | 
 19 | 
 20 | # -------------
 21 | # Lookup Type 1
 22 | # -------------
 23 | 
 24 | 
 25 | class GSUBLookupType1Format2(BaseSubTable):
 26 | 
 27 |     """
 28 |     Deviation from spec:
 29 |     - fontTools interprets Lookup Type 1 formats 1 and 2
 30 |       into the same object structure. As such, only format 2
 31 |       is needed.
 32 |     - GlyphCount attribute is not implemented.
 33 |     """
 34 | 
 35 |     __slots__ = ["Coverage", "Substitute"] + globalSubstitutionSubTableSlots
 36 | 
 37 |     def __init__(self):
 38 |         super(GSUBLookupType1Format2, self).__init__()
 39 |         self.SubstFormat = 2
 40 |         self.Substitute = []
 41 |         self.Coverage = None
 42 | 
 43 |     def loadFromFontTools(self, subtable, lookup):
 44 |         super(GSUBLookupType1Format2, self).loadFromFontTools(subtable, lookup)
 45 |         # fontTools has a custom implementation of this
 46 |         # subtable type, so it needs to be converted
 47 |         coverage = []
 48 |         self.Substitute = []
 49 |         for glyphName, alternate in sorted(subtable.mapping.items()):
 50 |             coverage.append(glyphName)
 51 |             self.Substitute.append(alternate)
 52 |         self.Coverage = Coverage().loadFromFontTools(coverage)
 53 |         return self
 54 | 
 55 |     def process(self, processed, glyphRecords, featureTag):
 56 |         performedSub = False
 57 |         currentRecord = glyphRecords[0]
 58 |         currentGlyph = currentRecord.glyphName
 59 |         if currentGlyph in self.Coverage:
 60 |             if not self._lookupFlagCoversGlyph(currentGlyph):
 61 |                 performedSub = True
 62 |                 index = self.Coverage.index(currentGlyph)
 63 |                 substitute = self.Substitute[index]
 64 |                 # special behavior for aalt
 65 |                 if featureTag == "aalt":
 66 |                     if currentRecord._alternatesReference != currentGlyph:
 67 |                         currentRecord._alternatesReference = currentGlyph
 68 |                         currentRecord.alternates = []
 69 |                     currentRecord.alternates.append(substitute)
 70 |                 # standard behavior
 71 |                 else:
 72 |                     currentRecord.saveState(currentRecord.glyphName)
 73 |                     currentRecord.glyphName = substitute
 74 |                 processed.append(currentRecord)
 75 |                 glyphRecords = glyphRecords[1:]
 76 |         return processed, glyphRecords, performedSub
 77 | 
 78 | 
 79 | # -------------
 80 | # Lookup Type 2
 81 | # -------------
 82 | 
 83 | 
 84 | class GSUBLookupType2(BaseSubTable):
 85 | 
 86 |     """
 87 |     Deviation from spec:
 88 |     - SequenceCount attribute is not implemented.
 89 |     """
 90 | 
 91 |     __slots__ = ["Coverage", "Sequence"] + globalSubstitutionSubTableSlots
 92 | 
 93 |     def __init__(self):
 94 |         super(GSUBLookupType2, self).__init__()
 95 |         self.SubstFormat = 1
 96 |         self.Coverage = None
 97 |         self.Sequence = []
 98 | 
 99 |     def loadFromFontTools(self, subtable, lookup):
100 |         super(GSUBLookupType2, self).loadFromFontTools(subtable, lookup)
101 |         try:
102 |             self.Coverage = Coverage().loadFromFontTools(subtable.Coverage)
103 |             self.Sequence = [Sequence().loadFromFontTools(sequence)
104 |                              for sequence in subtable.Sequence]
105 |         except AttributeError:
106 |             # the API for MultipleSubst lookups changed with fonttools 3.1:
107 |             # https://github.com/fonttools/fonttools/pull/364
108 |             mapping = getattr(subtable, "mapping", {})
109 |             coverage = sorted(mapping.keys())
110 |             self.Coverage = Coverage(coverage)
111 |             self.Sequence = [Sequence(mapping[glyph]) for glyph in coverage]
112 |         return self
113 | 
114 |     def process(self, processed, glyphRecords, featureTag):
115 |         performedSub = False
116 |         currentRecord = glyphRecords[0]
117 |         currentGlyph  = currentRecord.glyphName
118 |         if currentGlyph in self.Coverage:
119 |             if not self._lookupFlagCoversGlyph(currentGlyph):
120 |                 # XXX all glyph subsitituion states are destroyed here
121 |                 performedSub = True
122 |                 index = self.Coverage.index(currentGlyph)
123 |                 sequence = self.Sequence[index]
124 |                 substitute = sequence.Substitute
125 |                 substitute = glyphNamesToGlyphRecords(substitute)
126 |                 processed.extend(substitute)
127 |                 glyphRecords = glyphRecords[1:]
128 |         return processed, glyphRecords, performedSub
129 | 
130 | 
131 | class Sequence(object):
132 | 
133 |     """
134 |     Deviation from spec:
135 |     - GlyphCount attribute is not implemented.
136 |     """
137 | 
138 |     __slots__ = ["Substitute"]
139 | 
140 |     def __init__(self, substitute=None):
141 |         self.Substitute = list(substitute) if substitute is not None else []
142 | 
143 |     def loadFromFontTools(self, sequence):
144 |         self.Substitute = list(sequence.Substitute)
145 |         return self
146 | 
147 | # -------------
148 | # Lookup Type 3
149 | # -------------
150 | 
151 | 
152 | class GSUBLookupType3(BaseSubTable):
153 | 
154 |     """
155 |     Deviation from spec:
156 |     - AlternateSetCount attribute is not implemented.
157 |     """
158 | 
159 |     __slots__ = ["Coverage", "AlternateSet", "AlternateSetCount"] + globalSubstitutionSubTableSlots
160 | 
161 |     def __init__(self):
162 |         super(GSUBLookupType3, self).__init__()
163 |         self.SubstFormat = 1
164 |         self.AlternateSet = []
165 |         self.Coverage = None
166 |         self.AlternateSetCount = 0
167 | 
168 |     def loadFromFontTools(self, subtable, lookup):
169 |         super(GSUBLookupType3, self).loadFromFontTools(subtable, lookup)
170 |         # fontTools has a custom implementation of this
171 |         # subtable type, so it needs to be converted
172 |         coverage = []
173 |         self.AlternateSet = []
174 |         for glyphName, alternates in subtable.alternates.items():
175 |             coverage.append(glyphName)
176 |             alternateSet = AlternateSet().loadFromFontTools(alternates)
177 |             self.AlternateSet.append(alternateSet)
178 |         self.Coverage = Coverage().loadFromFontTools(coverage)
179 |         self.AlternateSetCount = len(self.AlternateSet)
180 |         return self
181 | 
182 |     def process(self, processed, glyphRecords, featureTag):
183 |         performedSub = False
184 |         currentRecord = glyphRecords[0]
185 |         currentGlyph = currentRecord.glyphName
186 |         if currentGlyph in self.Coverage:
187 |             if not self._lookupFlagCoversGlyph(currentGlyph):
188 |                 performedSub = True
189 |                 index = self.Coverage.index(currentGlyph)
190 |                 alternateSet = self.AlternateSet[index]
191 |                 alternates = alternateSet.Alternate
192 |                 # special behavior for rand
193 |                 if featureTag == "rand":
194 |                     currentRecord.saveState(currentRecord.glyphName)
195 |                     currentRecord.glyphName = choice(alternates)
196 |                 # standard behavior
197 |                 else:
198 |                     if currentRecord._alternatesReference != currentGlyph:
199 |                         currentRecord._alternatesReference = currentGlyph
200 |                         currentRecord.alternates = []
201 |                     currentRecord.alternates.extend(alternates)
202 |                 processed.append(currentRecord)
203 |                 glyphRecords = glyphRecords[1:]
204 |         return processed, glyphRecords, performedSub
205 | 
206 | 
207 | class AlternateSet(object):
208 | 
209 |     """
210 |     Deviation from spec:
211 |     - GlyphCount attribute is not implemented.
212 |     """
213 | 
214 |     __slots__ = ["Alternate"]
215 | 
216 |     def __init__(self):
217 |         self.Alternate = []
218 | 
219 |     def loadFromFontTools(self, alternates):
220 |         self.Alternate = list(alternates)
221 |         return self
222 | 
223 | 
224 | # -------------
225 | # Lookup Type 4
226 | # -------------
227 | 
228 | 
229 | class GSUBLookupType4(BaseSubTable):
230 | 
231 |     """
232 |     Deviation from spec:
233 |     - LigSetCount attribute is not implemented.
234 |     """
235 | 
236 |     __slots__ = ["Coverage", "LigatureSet"] + globalSubstitutionSubTableSlots
237 | 
238 |     def __init__(self):
239 |         super(GSUBLookupType4, self).__init__()
240 |         self.SubstFormat = 1
241 |         self.LigatureSet = []
242 |         self.Coverage = None
243 | 
244 |     def loadFromFontTools(self, subtable, lookup):
245 |         super(GSUBLookupType4, self).loadFromFontTools(subtable, lookup)
246 |         # fontTools has a custom implementation of this
247 |         # subtable type, so it needs to be converted
248 |         coverage = []
249 |         self.LigatureSet = []
250 |         for glyphName, ligature in subtable.ligatures.items():
251 |             ligatureSet = LigatureSet().loadFromFontTools(ligature)
252 |             self.LigatureSet.append(ligatureSet)
253 |             coverage.append(glyphName)
254 |         self.Coverage = Coverage().loadFromFontTools(coverage)
255 |         return self
256 | 
257 |     def process(self, processed, glyphRecords, featureTag):
258 |         performedSub = False
259 |         currentRecord = glyphRecords[0]
260 |         currentGlyph = currentRecord.glyphName
261 |         lookupFlag = self._lookup().LookupFlag
262 |         if currentGlyph in self.Coverage:
263 |             if not lookupFlag.coversGlyph(currentGlyph):
264 |                 while not performedSub:
265 |                     coverageIndex = self.Coverage.index(currentGlyph)
266 |                     ligatureSet = self.LigatureSet[coverageIndex]
267 |                     for ligature in ligatureSet.Ligature:
268 |                         component = ligature.Component
269 |                         componentCount = ligature.CompCount
270 |                         currentComponentIndex = 0
271 |                         matchedRecordIndexes = set()
272 |                         lastWasMatch = False
273 |                         for index, glyphRecord in enumerate(glyphRecords[1:]):
274 |                             glyphName = glyphRecord.glyphName
275 |                             if not lookupFlag.coversGlyph(glyphName):
276 |                                 if not glyphName == component[currentComponentIndex]:
277 |                                     lastWasMatch = False
278 |                                     break
279 |                                 else:
280 |                                     lastWasMatch = True
281 |                                     matchedRecordIndexes.add(index)
282 |                                     currentComponentIndex += 1
283 |                                     if currentComponentIndex == componentCount - 1:
284 |                                         break
285 |                         if lastWasMatch and currentComponentIndex == componentCount - 1:
286 |                             performedSub = True
287 |                             currentRecord.saveState([currentGlyph] + ligature.Component)
288 |                             currentRecord.glyphName = ligature.LigGlyph
289 |                             currentRecord.ligatureComponents = [currentGlyph] + ligature.Component
290 |                             processed.append(currentRecord)
291 |                             glyphRecords = [record for index, record in enumerate(glyphRecords[1:]) if index not in matchedRecordIndexes]
292 |                             break
293 |                     break
294 |         return processed, glyphRecords, performedSub
295 | 
296 | 
297 | class LigatureSet(object):
298 | 
299 |     """
300 |     Deviation from spec: None
301 |     """
302 | 
303 |     __slots__ = ["LigatureCount", "Ligature"]
304 | 
305 |     def __init__(self):
306 |         self.Ligature = []
307 |         self.LigatureCount = 0
308 | 
309 |     def loadFromFontTools(self, ligatures):
310 |         self.Ligature = [Ligature().loadFromFontTools(ligature) for ligature in ligatures]
311 |         self.LigatureCount = len(self.Ligature)
312 |         return self
313 | 
314 | 
315 | class Ligature(object):
316 | 
317 |     """
318 |     Deviation from spec: None
319 |     """
320 | 
321 |     __slots__ = ["LigGlyph", "CompCount", "Component"]
322 | 
323 |     def __init__(self):
324 |         self.CompCount = None
325 |         self.LigGlyph = None
326 |         self.Component = []
327 | 
328 |     def loadFromFontTools(self, ligature):
329 |         self.CompCount = ligature.CompCount
330 |         self.LigGlyph = ligature.LigGlyph
331 |         self.Component = list(ligature.Component)
332 |         return self
333 | 
334 | 
335 | # -------------
336 | # Lookup Type 5
337 | # -------------
338 | 
339 | 
340 | class GSUBLookupType5Format1(BaseContextFormat1SubTable):
341 | 
342 |     """
343 |     Deviation from spec:
344 |     - SubRuleSetCount attribute is not implemented.
345 | 
346 |     A private attribute is implemented:
347 |     _RuleSet - The value of SubRuleSet
348 | 
349 |     The private attribute is needed because the contextual subtable processing
350 |     is abstracted so that it can be shared between GSUB and GPOS.
351 |     """
352 | 
353 |     __slots__ = ["Coverage", "SubRuleSet"] + globalSubstitutionSubTableSlots
354 | 
355 |     def __init__(self):
356 |         super(GSUBLookupType5Format1, self).__init__()
357 |         self.SubstFormat = 1
358 |         self.Coverage = None
359 |         self.SubRuleSet = []
360 | 
361 |     def loadFromFontTools(self, subtable, lookup):
362 |         super(GSUBLookupType5Format1, self).loadFromFontTools(subtable, lookup)
363 |         self.Coverage = Coverage().loadFromFontTools(subtable.Coverage)
364 |         self.SubRuleSet = [SubRuleSet().loadFromFontTools(subRuleSet) for subRuleSet in subtable.SubRuleSet]
365 |         return self
366 | 
367 |     def _get_RuleSet(self):
368 |         return self.SubRuleSet
369 | 
370 |     _RuleSet = property(_get_RuleSet)
371 | 
372 | 
373 | class SubRuleSet(object):
374 | 
375 |     """
376 |     Deviation from spec:
377 |     - SubRuleCount attribute is not implemented.
378 | 
379 |     A private attribute is implemented:
380 |     _Rule - The value of SubRule
381 | 
382 |     The private attribute is needed because the contextual subtable processing
383 |     is abstracted so that it can be shared between GSUB and GPOS.
384 |     """
385 | 
386 |     __slots__ = ["SubRule"]
387 | 
388 |     def __init__(self):
389 |         self.SubRule = []
390 | 
391 |     def loadFromFontTools(self, subRuleSet):
392 |         self.SubRule = [SubRule().loadFromFontTools(subRule) for subRule in subRuleSet.SubRule]
393 |         return self
394 | 
395 |     def _get_Rule(self):
396 |         return self.SubRule
397 | 
398 |     _Rule = property(_get_Rule)
399 | 
400 | 
401 | class SubRule(object):
402 | 
403 |     """
404 |     Deviation from spec: None
405 | 
406 |     Two private attributes are implemented:
407 |     _ActionCount - The value of SubstCount
408 |     _ActionLookupRecord - The value of SubstLookupRecord
409 | 
410 |     The private attributes are needed because the contextual subtable processing
411 |     is abstracted so that it can be shared between GSUB and GPOS.
412 |     """
413 | 
414 |     __slots__ = ["Input", "GlyphCount", "SubstCount", "SubstLookupRecord"]
415 | 
416 |     def __init__(self):
417 |         self.Input = []
418 |         self.GlyphCount = 0
419 |         self.SubstCount = 0
420 |         self.SubstLookupRecord = []
421 | 
422 |     def loadFromFontTools(self, subRule):
423 |         self.Input = list(subRule.Input)
424 |         self.GlyphCount = subRule.GlyphCount
425 |         self.SubstCount = subRule.SubstCount
426 |         self.SubstLookupRecord = [SubstLookupRecord().loadFromFontTools(record) for record in subRule.SubstLookupRecord]
427 |         return self
428 | 
429 |     def _get_ActionCount(self):
430 |         return self.SubstCount
431 | 
432 |     _ActionCount = property(_get_ActionCount)
433 | 
434 |     def _get_ActionLookupRecord(self):
435 |         return self.SubstLookupRecord
436 | 
437 |     _ActionLookupRecord = property(_get_ActionLookupRecord)
438 | 
439 | 
440 | class GSUBLookupType5Format2(BaseContextFormat2SubTable):
441 | 
442 |     """
443 |     Deviation from spec:
444 |     - SubClassRuleCnt attribute is not implemented.
445 |     """
446 | 
447 |     __slots__ = ["Coverage", "ClassDef", "SubClassSet"] + globalSubstitutionSubTableSlots
448 | 
449 |     def __init__(self):
450 |         super(GSUBLookupType5Format2, self).__init__()
451 |         self.SubstFormat = 2
452 |         self.Coverage = None
453 |         self.ClassDef = None
454 |         self.SubClassSet = []
455 | 
456 |     def loadFromFontTools(self, subtable, lookup):
457 |         super(GSUBLookupType5Format2, self).loadFromFontTools(subtable, lookup)
458 |         self.Coverage = Coverage().loadFromFontTools(subtable.Coverage)
459 |         self.ClassDef = ClassDef().loadFromFontTools(subtable.ClassDef)
460 |         self.SubClassSet = []
461 |         for subClassSet in subtable.SubClassSet:
462 |             if subClassSet is None:
463 |                 self.SubClassSet.append(None)
464 |             else:
465 |                 self.SubClassSet.append(SubClassSet().loadFromFontTools(subClassSet))
466 |         return self
467 | 
468 |     def _get_ClassSet(self):
469 |         return self.SubClassSet
470 | 
471 |     _ClassSet = property(_get_ClassSet)
472 | 
473 | 
474 | class SubClassSet(object):
475 | 
476 |     """
477 |     Deviation from spec:
478 |     - SubClassRuleCnt attribute is not implemented.
479 |     """
480 | 
481 |     __slots__ = ["SubClassRule"]
482 | 
483 |     def __init__(self):
484 |         self.SubClassRule = []
485 | 
486 |     def loadFromFontTools(self, subClassSet):
487 |         self.SubClassRule = [SubClassRule().loadFromFontTools(subClassRule) for subClassRule in subClassSet.SubClassRule]
488 |         return self
489 | 
490 |     def _get_ClassRule(self):
491 |         return self.SubClassRule
492 | 
493 |     _ClassRule = property(_get_ClassRule)
494 | 
495 | 
496 | class SubClassRule(object):
497 | 
498 |     """
499 |     Deviation from spec: None
500 | 
501 |     Two private attributes are implemented:
502 |     _ActionCount - The value of SubstCount
503 |     _ActionLookupRecord - The value of SubstLookupRecord
504 | 
505 |     The private attributes are needed because the contextual subtable processing
506 |     is abstracted so that it can be shared between GSUB and GPOS.
507 |     """
508 | 
509 |     __slots__ = ["Class", "GlyphCount", "SubstCount", "SubstLookupRecord"]
510 | 
511 |     def __init__(self):
512 |         self.Class = []
513 |         self.GlyphCount = 0
514 |         self.SubstCount = 0
515 |         self.SubstLookupRecord = []
516 | 
517 |     def loadFromFontTools(self, subClassRule):
518 |         self.Class = list(subClassRule.Class)
519 |         self.GlyphCount = subClassRule.GlyphCount
520 |         self.SubstCount = subClassRule.SubstCount
521 |         self.SubstLookupRecord = [SubstLookupRecord().loadFromFontTools(record) for record in subClassRule.SubstLookupRecord]
522 |         return self
523 | 
524 |     def _get_ActionCount(self):
525 |         return self.SubstCount
526 | 
527 |     _ActionCount = property(_get_ActionCount)
528 | 
529 |     def _get_ActionLookupRecord(self):
530 |         return self.SubstLookupRecord
531 | 
532 |     _ActionLookupRecord = property(_get_ActionLookupRecord)
533 | 
534 | 
535 | class GSUBLookupType5Format3(BaseContextFormat3SubTable):
536 | 
537 |     """
538 |     Deviation from spec: None
539 | 
540 |     Two private attributes are implemented:
541 |     _ActionCount - The value of SubstCount
542 |     _ActionLookupRecord - The value of SubstLookupRecord
543 | 
544 |     The private attributes are needed because the contextual subtable processing
545 |     is abstracted so that it can be shared between GSUB and GPOS.
546 |     """
547 | 
548 |     def __init__(self):
549 |         super(GSUBLookupType5Format3, self).__init__()
550 |         self.SubstFormat = 3
551 |         self.Coverage = []
552 |         self.GlyphCount = 0
553 |         self.SubstCount = 0
554 |         self.SubstLookupRecord = []
555 | 
556 |     def loadFromFontTools(self, subtable, lookup):
557 |         super(GSUBLookupType5Format3, self).loadFromFontTools(subtable, lookup)
558 |         self.Coverage = [Coverage().loadFromFontTools(coverage) for coverage in subtable.Coverage]
559 |         self.GlyphCount = subtable.GlyphCount
560 |         self.SubstCount = subtable.SubstCount
561 |         self.SubstLookupRecord = [SubstLookupRecord().loadFromFontTools(record) for record in subtable.SubstLookupRecord]
562 |         return self
563 | 
564 |     def _get_ActionCount(self):
565 |         return self.SubstCount
566 | 
567 |     _ActionCount = property(_get_ActionCount)
568 | 
569 |     def _get_ActionLookupRecord(self):
570 |         return self.SubstLookupRecord
571 | 
572 |     _ActionLookupRecord = property(_get_ActionLookupRecord)
573 | 
574 | 
575 | # -------------
576 | # Lookup Type 6
577 | # -------------
578 | 
579 | 
580 | class GSUBLookupType6Format1(BaseChainingContextFormat1SubTable):
581 | 
582 |     """
583 |     Deviation from spec:
584 |     - ChainSubRuleSetCount attribute is not implemented.
585 | 
586 |     A private attribute is implemented:
587 |     _ChainRuleSet - The value of ChainSubRuleSet
588 | 
589 |     The private attribute is needed because the contextual subtable processing
590 |     is abstracted so that it can be shared between GSUB and GPOS.
591 |     """
592 | 
593 |     __slots__ = ["Coverage", "ChainSubRuleSet"] + globalSubstitutionSubTableSlots
594 | 
595 |     def __init__(self):
596 |         super(GSUBLookupType6Format1, self).__init__()
597 |         self.SubstFormat = 1
598 |         self.Coverage = None
599 |         self.ChainSubRuleSet = []
600 | 
601 |     def loadFromFontTools(self, subtable, lookup):
602 |         super(GSUBLookupType6Format1, self).loadFromFontTools(subtable, lookup)
603 |         self.Coverage = Coverage().loadFromFontTools(subtable.Coverage)
604 |         self.ChainSubRuleSet = [ChainSubRuleSet().loadFromFontTools(chainSubRuleSet) for chainSubRuleSet in subtable.ChainSubRuleSet]
605 |         return self
606 | 
607 |     def _get_ChainRuleSet(self):
608 |         return self.ChainSubRuleSet
609 | 
610 |     _ChainRuleSet = property(_get_ChainRuleSet)
611 | 
612 | 
613 | class ChainSubRuleSet(object):
614 | 
615 |     """
616 |     Deviation from spec:
617 |     - ChainSubRuleCount attribute is not implemented.
618 | 
619 |     A private attribute is implemented:
620 |     _ChainRule - The value of ChainSubRule
621 | 
622 |     The private attribute is needed because the contextual subtable processing
623 |     is abstracted so that it can be shared between GSUB and GPOS.
624 |     """
625 | 
626 |     __slots__ = ["ChainSubRule"]
627 | 
628 |     def __init__(self):
629 |         self.ChainSubRule = []
630 | 
631 |     def loadFromFontTools(self, chainSubRuleSet):
632 |         self.ChainSubRule = [ChainSubRule().loadFromFontTools(chainSubRule) for chainSubRule in chainSubRuleSet.ChainSubRule]
633 |         return self
634 | 
635 |     def _get_ChainRule(self):
636 |         return self.ChainSubRule
637 | 
638 |     _ChainRule = property(_get_ChainRule)
639 | 
640 | 
641 | class ChainSubRule(object):
642 | 
643 |     """
644 |     Deviation from spec: None
645 | 
646 |     Two private attributes are implemented:
647 |     _ActionCount - The value of SubstCount
648 |     _ActionLookupRecord - The value of SubstLookupRecord
649 | 
650 |     The private attributes are needed because the contextual subtable processing
651 |     is abstracted so that it can be shared between GSUB and GPOS.
652 |     """
653 | 
654 |     __slots__ = ["BacktrackGlyphCount", "Backtrack", "InputGlyphCount", "Input",
655 |                 "LookAheadGlyphCount", "LookAhead",
656 |                 "SubstCount", "SubstLookupRecord",]
657 | 
658 |     def __init__(self):
659 |         self.BacktrackGlyphCount = 0
660 |         self.Backtrack = []
661 |         self.InputGlyphCount = 0
662 |         self.Input = []
663 |         self.LookAheadGlyphCount = 0
664 |         self.LookAhead = []
665 |         self.SubstCount = 0
666 |         self.SubstLookupRecord = []
667 | 
668 |     def loadFromFontTools(self, chainSubRule):
669 |         self.BacktrackGlyphCount = chainSubRule.BacktrackGlyphCount
670 |         self.Backtrack = list(chainSubRule.Backtrack)
671 |         self.InputGlyphCount = chainSubRule.InputGlyphCount
672 |         self.Input = list(chainSubRule.Input)
673 |         self.LookAheadGlyphCount = chainSubRule.LookAheadGlyphCount
674 |         self.LookAhead = list(chainSubRule.LookAhead)
675 |         self.SubstCount = chainSubRule.SubstCount
676 |         self.SubstLookupRecord = [SubstLookupRecord().loadFromFontTools(record) for record in chainSubRule.SubstLookupRecord]
677 |         return self
678 | 
679 |     def _get_ActionCount(self):
680 |         return self.SubstCount
681 | 
682 |     _ActionCount = property(_get_ActionCount)
683 | 
684 |     def _get_ActionLookupRecord(self):
685 |         return self.SubstLookupRecord
686 | 
687 |     _ActionLookupRecord = property(_get_ActionLookupRecord)
688 | 
689 | 
690 | class GSUBLookupType6Format2(BaseChainingContextFormat2SubTable):
691 | 
692 |     """
693 |     Deviation from spec:
694 |     -ChainSubClassSetCnt attribute is not implemented.
695 | 
696 |     A private attribute is implemented:
697 |     _ChainClassSet - The value of ChainPosClassSet
698 | 
699 |     The private attribute is needed because the contextual subtable processing
700 |     is abstracted so that it can be shared between GSUB and GPOS.
701 |     """
702 | 
703 |     __slots__ = ["Coverage", "BacktrackClassDef", "InputClassDef",
704 |         "LookAheadClassDef", "ChainSubClassSet"] + globalSubstitutionSubTableSlots
705 | 
706 |     def __init__(self):
707 |         super(GSUBLookupType6Format2, self).__init__()
708 |         self.SubstFormat = 2
709 |         self.Coverage = None
710 |         self.BacktrackClassDef = None
711 |         self.InputClassDef = None
712 |         self.LookAheadClassDef = None
713 |         self.ChainSubClassSet = []
714 | 
715 |     def loadFromFontTools(self, subtable, lookup):
716 |         super(GSUBLookupType6Format2, self).loadFromFontTools(subtable, lookup)
717 |         self.Coverage = Coverage().loadFromFontTools(subtable.Coverage)
718 |         self.BacktrackClassDef = ClassDef().loadFromFontTools(subtable.BacktrackClassDef)
719 |         self.InputClassDef = ClassDef().loadFromFontTools(subtable.InputClassDef)
720 |         self.LookAheadClassDef = ClassDef().loadFromFontTools(subtable.LookAheadClassDef)
721 |         self.ChainSubClassSet = []
722 |         for chainSubClassSet in subtable.ChainSubClassSet:
723 |             if chainSubClassSet is None:
724 |                 self.ChainSubClassSet.append(None)
725 |             else:
726 |                 self.ChainSubClassSet.append(ChainSubClassSet().loadFromFontTools(chainSubClassSet))
727 |         return self
728 | 
729 |     def _get_ChainClassSet(self):
730 |         return self.ChainSubClassSet
731 | 
732 |     _ChainClassSet = property(_get_ChainClassSet)
733 | 
734 | 
735 | class ChainSubClassSet(object):
736 | 
737 |     """
738 |     Deviation from spec:
739 |     -ChainSubClassRuleCnt attribute is not implemented.
740 | 
741 |     A private attribute is implemented:
742 |     _ChainClassRule - The value of ChainSubClassRule
743 | 
744 |     The private attribute is needed because the contextual subtable processing
745 |     is abstracted so that it can be shared between GSUB and GPOS.
746 |     """
747 | 
748 |     __slots__ = ["ChainSubClassRule"]
749 | 
750 |     def __init__(self):
751 |         self.ChainSubClassRule = None
752 | 
753 |     def loadFromFontTools(self, chainSubClassSet):
754 |         self.ChainSubClassRule = [ChainSubClassRule().loadFromFontTools(chainSubClassRule) for chainSubClassRule in chainSubClassSet.ChainSubClassRule]
755 |         return self
756 | 
757 |     def _get_ChainClassRule(self):
758 |         return self.ChainSubClassRule
759 | 
760 |     _ChainClassRule = property(_get_ChainClassRule)
761 | 
762 | 
763 | class ChainSubClassRule(object):
764 | 
765 |     """
766 |     Deviation from spec: None
767 | 
768 |     Two private attributes are implemented:
769 |     _ActionCount - The value of SubstCount
770 |     _ActionLookupRecord - The value of SubstLookupRecord
771 | 
772 |     The private attributes are needed because the contextual subtable processing
773 |     is abstracted so that it can be shared between GSUB and GPOS.
774 |     """
775 | 
776 |     __slots__ = ["BacktrackGlyphCount", "Backtrack",
777 |         "InputGlyphCount", "Input",
778 |         "LookAheadGlyphCount", "LookAhead",
779 |         "SubstCount", "SubstLookupRecord"]
780 | 
781 |     def __init__(self):
782 |         self.BacktrackGlyphCount = 0
783 |         self.Backtrack = []
784 |         self.InputGlyphCount = 0
785 |         self.Input = []
786 |         self.LookAheadGlyphCount = 0
787 |         self.LookAhead = []
788 |         self.SubstCount = 0
789 |         self.SubstLookupRecord = []
790 | 
791 |     def loadFromFontTools(self, chainSubClassRule):
792 |         self.BacktrackGlyphCount = chainSubClassRule.BacktrackGlyphCount
793 |         self.Backtrack = list(chainSubClassRule.Backtrack)
794 |         self.InputGlyphCount = chainSubClassRule.InputGlyphCount
795 |         self.Input = list(chainSubClassRule.Input)
796 |         self.LookAheadGlyphCount = chainSubClassRule.LookAheadGlyphCount
797 |         self.LookAhead = list(chainSubClassRule.LookAhead)
798 |         self.SubstCount = chainSubClassRule.SubstCount
799 |         self.SubstLookupRecord = [SubstLookupRecord().loadFromFontTools(record) for record in chainSubClassRule.SubstLookupRecord]
800 |         return self
801 | 
802 |     def _get_ActionCount(self):
803 |         return self.SubstCount
804 | 
805 |     _ActionCount = property(_get_ActionCount)
806 | 
807 |     def _get_ActionLookupRecord(self):
808 |         return self.SubstLookupRecord
809 | 
810 |     _ActionLookupRecord = property(_get_ActionLookupRecord)
811 | 
812 | 
813 | class GSUBLookupType6Format3(BaseChainingContextFormat3SubTable):
814 | 
815 |     """
816 |     Deviation from spec: None
817 | 
818 |     Two private attributes are implemented:
819 |     _ActionCount - The value of SubstCount
820 |     _ActionLookupRecord - The value of SubstLookupRecord
821 | 
822 |     The private attributes are needed because the contextual subtable processing
823 |     is abstracted so that it can be shared between GSUB and GPOS.
824 |     """
825 | 
826 |     __slots__ = ["BacktrackGlyphCount", "BacktrackCoverage", "InputGlyphCount", "InputCoverage"
827 |                 "LookaheadGlyphCount", "LookaheadCoverage",
828 |                 "SubstCount", "SubstLookupRecord"] + globalSubstitutionSubTableSlots
829 | 
830 |     def __init__(self):
831 |         super(GSUBLookupType6Format3, self).__init__()
832 |         self.SubstFormat = 3
833 |         self.SubstCount = 0
834 |         self.SubstLookupRecord = []
835 | 
836 |     def loadFromFontTools(self, subtable, lookup):
837 |         super(GSUBLookupType6Format3, self).loadFromFontTools(subtable, lookup)
838 |         self.SubstCount = subtable.SubstCount
839 |         self.SubstLookupRecord = [SubstLookupRecord().loadFromFontTools(record) for record in subtable.SubstLookupRecord]
840 |         return self
841 | 
842 |     def _get_ActionCount(self):
843 |         return self.SubstCount
844 | 
845 |     _ActionCount = property(_get_ActionCount)
846 | 
847 |     def _get_ActionLookupRecord(self):
848 |         return self.SubstLookupRecord
849 | 
850 |     _ActionLookupRecord = property(_get_ActionLookupRecord)
851 | 
852 | 
853 | class SubstLookupRecord(BaseLookupRecord): pass
854 | 
855 | 
856 | # -------------
857 | # Lookup Type 7
858 | # -------------
859 | 
860 | 
861 | class GSUBLookupType7(BaseSubTable):
862 | 
863 |     """
864 |     Deviation from spec:
865 |     - ExtensionOffset attribute is not implemented. In its place
866 |       is the ExtSubTable attribute. That attribute references
867 |       the subtable that should be used for processing.
868 |     """
869 | 
870 |     __slots__ = ["ExtensionLookupType", "ExtSubTable"] + globalSubstitutionSubTableSlots
871 | 
872 |     def __init__(self):
873 |         self.SubstFormat = 1
874 |         self.ExtSubTable = None
875 | 
876 |     def loadFromFontTools(self, subtable, lookup):
877 |         super(GSUBLookupType7, self).loadFromFontTools(subtable, lookup)
878 |         self.ExtensionLookupType = subtable.ExtensionLookupType
879 |         lookupType = self.ExtensionLookupType
880 |         if lookupType == 1:
881 |             cls = GSUBLookupType1Format2
882 |         elif lookupType == 2:
883 |             cls = GSUBLookupType2
884 |         elif lookupType == 3:
885 |             cls = GSUBLookupType3
886 |         elif lookupType == 4:
887 |             cls = GSUBLookupType4
888 |         elif lookupType == 5:
889 |             cls = (GSUBLookupType5Format1, GSUBLookupType5Format2, GSUBLookupType5Format3)[subtable.ExtSubTable.Format-1]
890 |         elif lookupType == 6:
891 |             cls = (GSUBLookupType6Format1, GSUBLookupType6Format2, GSUBLookupType6Format3)[subtable.ExtSubTable.Format-1]
892 |         elif lookupType == 7:
893 |             cls = GSUBLookupType7
894 |         elif lookupType == 8:
895 |             cls = GSUBLookupType8
896 |         self.ExtSubTable = cls().loadFromFontTools(subtable.ExtSubTable, lookup)
897 |         return self
898 | 
899 |     def process(self, processed, glyphRecords, featureTag):
900 |         return self.ExtSubTable.process(processed, glyphRecords, featureTag)
901 | 
902 | 
903 | # -------------
904 | # Lookup Type 8
905 | # -------------
906 | 
907 | 
908 | class GSUBLookupType8(BaseSubTable): pass
909 | 


--------------------------------------------------------------------------------
/Lib/compositor/tables.py:
--------------------------------------------------------------------------------
  1 | """
  2 | GSUB, GPOS and GDEF table objects.
  3 | """
  4 | 
  5 | import unicodedata
  6 | from compositor.cmap import reverseCMAP
  7 | from compositor.scriptList import ScriptList
  8 | from compositor.featureList import FeatureList
  9 | from compositor.lookupList import GSUBLookupList, GPOSLookupList
 10 | from compositor.subTablesBase import Coverage
 11 | from compositor.classDefinitionTables import MarkAttachClassDef, GlyphClassDef
 12 | from compositor.textUtilities import isWordBreakBefore, isWordBreakAfter
 13 | 
 14 | 
 15 | defaultOnFeatures = [
 16 |     # GSUB
 17 |     "calt",
 18 |     "ccmp", # this should always be the first feature processed
 19 |     "clig",
 20 |     "fina",
 21 |     "half", # applies only to indic
 22 |     "init",
 23 |     "isol",
 24 |     "liga",
 25 |     "locl",
 26 |     "med2", # applies only to syriac
 27 |     "medi",
 28 |     "nukt", # applies only to indic
 29 |     "pref", # applies only to khmer and myanmar
 30 |     "pres", # applies only to indic
 31 |     "pstf", # applies only to indic
 32 |     "psts",
 33 |     "rand",
 34 |     "rlig", # applies only to arabic and syriac
 35 |     "rphf", # applies only to indic
 36 |     "tjmo", # applies only to hangul
 37 |     "vatu", # applies only to indic
 38 |     "vjmo", # applies only to hangul
 39 |     # GPOS
 40 |     "abvm",  # applies only to indic
 41 |     "blwm",  # applies only to indic
 42 |     "kern",
 43 |     "mark",
 44 |     "mkmk",
 45 |     "opbd",
 46 |     "vkrn"
 47 | ]
 48 | 
 49 | 
 50 | class BaseTable(object):
 51 | 
 52 |     def __init__(self, reversedCMAP={}):
 53 |         self.ScriptList = None
 54 |         self.FeatureList = None
 55 |         self.LookupList = None
 56 | 
 57 |         self._cmap = reversedCMAP
 58 | 
 59 |         self._featureApplicationStates = {}
 60 |         self._applicableFeatureCache = {}
 61 |         self._featureTags = None
 62 | 
 63 |     def loadFromFontTools(self, table, reversedCMAP, gdef):
 64 |         self._cmap = reversedCMAP
 65 |         self.ScriptList = ScriptList().loadFromFontTools(table.table.ScriptList)
 66 |         self.FeatureList = FeatureList().loadFromFontTools(table.table.FeatureList)
 67 |         self.LookupList = self._LookupListClass().loadFromFontTools(table.table.LookupList, gdef)
 68 |         self.getFeatureList()
 69 |         self._setDefaultFeatureApplicationStates()
 70 |         return self
 71 | 
 72 |     def setCMAP(self, reversedCMAP):
 73 |         self._cmap = reversedCMAP
 74 | 
 75 |     def process(self, glyphRecords, script="latn", langSys=None, logger=None):
 76 |         """
 77 |         Pass the list of GlyphRecord objects through the features
 78 |         applicable for the given script and langSys. This returns
 79 |         a list of processed GlyphRecord objects.
 80 |         """
 81 |         applicableLookups = self._preprocess(script, langSys)
 82 |         if logger:
 83 |             logger.logApplicableLookups(self, applicableLookups)
 84 |             logger.logProcessingStart()
 85 |         result = self._processLookups(glyphRecords, applicableLookups, logger=logger)
 86 |         if logger:
 87 |             logger.logProcessingEnd()
 88 |         return result
 89 | 
 90 |     # ------------------
 91 |     # feature management
 92 |     # ------------------
 93 | 
 94 |     def _setDefaultFeatureApplicationStates(self):
 95 |         """
 96 |         Activate all features defined as on by
 97 |         default in the Layout Tag Registry.
 98 |         """
 99 |         for tag in self._featureTags:
100 |             if tag in defaultOnFeatures:
101 |                 state = True
102 |             else:
103 |                 state = False
104 |             self._featureApplicationStates[tag] = state
105 | 
106 |     def __contains__(self, featureTag):
107 |         return featureTag in self._featureTags
108 | 
109 |     def getScriptList(self):
110 |         """
111 |         Get a list of all available scripts in the table.
112 |         """
113 |         found = []
114 |         for scriptRecord in self.ScriptList.ScriptRecord:
115 |             scriptTag = scriptRecord.ScriptTag
116 |             if scriptTag not in found:
117 |                 found.append(scriptTag)
118 |         return found
119 | 
120 |     def getLanguageList(self):
121 |         """
122 |         Get a list of all available languages in the table.
123 |         """
124 |         found = []
125 |         for scriptRecord in self.ScriptList.ScriptRecord:
126 |             script = scriptRecord.Script
127 |             if script.LangSysCount:
128 |                 for langSysRecord in script.LangSysRecord:
129 |                     langSysTag = langSysRecord.LangSysTag
130 |                     if langSysTag not in found:
131 |                         found.append(langSysTag)
132 |         return found
133 | 
134 |     def getFeatureList(self):
135 |         """
136 |         Get a list of all available features in the table.
137 |         """
138 |         if self._featureTags is None:
139 |             featureList = self.FeatureList
140 |             featureRecords = featureList.FeatureRecord
141 |             self._featureTags = []
142 |             for featureRecord in featureRecords:
143 |                 tag = featureRecord.FeatureTag
144 |                 if tag not in self._featureTags:
145 |                     self._featureTags.append(tag)
146 |         return self._featureTags
147 | 
148 |     def getFeatureState(self, featureTag):
149 |         """
150 |         Get a boolean representing if a feature is on or not.
151 |         """
152 |         return self._featureApplicationStates[featureTag]
153 | 
154 |     def setFeatureState(self, featureTag, state):
155 |         """
156 |         Set the application state of a feature.
157 |         """
158 |         self._featureApplicationStates[featureTag] = state
159 | 
160 |     # -------------
161 |     # preprocessing
162 |     # -------------
163 | 
164 |     def _preprocess(self, script, langSys):
165 |         """
166 |         Get a list of ordered (featureTag, lookupObject)
167 |         for the given script and langSys.
168 |         """
169 |         # 1. get a list of applicable feature records
170 |         #    based on the script and langSys
171 |         features = self._getApplicableFeatures(script, langSys)
172 |         # 2. get a list of applicable lookup tables based on the
173 |         #    found features and the feature application states
174 |         lookupIndexes = set()
175 |         for feature in features:
176 |             featureTag = feature.FeatureTag
177 |             if not self._featureApplicationStates[featureTag]:
178 |                 continue
179 |             featureRecord = feature.Feature
180 |             if featureRecord.LookupCount:
181 |                 for lookupIndex in featureRecord.LookupListIndex:
182 |                     lookupIndexes.add((lookupIndex, featureTag))
183 |         # 3. get a list of ordered lookup records for each feature
184 |         lookupList = self.LookupList
185 |         lookupRecords = lookupList.Lookup
186 |         applicableLookups = []
187 |         for lookupIndex, featureTag in sorted(lookupIndexes):
188 |             lookup = lookupRecords[lookupIndex]
189 |             applicableLookups.append((featureTag, lookup))
190 |         return applicableLookups
191 | 
192 |     def _getApplicableFeatures(self, script, langSys):
193 |         """
194 |         Get a list of features that apply to
195 |         a particular script and langSys. Both
196 |         script and langSys can be None. However,
197 |         if script is None and no script record
198 |         in the font is assigned to DFLT, no
199 |         features wil be found.
200 |         """
201 |         # first check to see if this has already been found
202 |         if (script, langSys) in self._applicableFeatureCache:
203 |             return self._applicableFeatureCache[script, langSys]
204 |         scriptList = self.ScriptList
205 |         # 1. Find the appropriate script record
206 |         scriptRecords = scriptList.ScriptRecord
207 |         defaultScript = None
208 |         applicableScript = None
209 |         for scriptRecord in scriptRecords:
210 |             scriptTag = scriptRecord.ScriptTag
211 |             if scriptTag == "DFLT":
212 |                 defaultScript = scriptRecord.Script
213 |                 continue
214 |             if scriptTag == script:
215 |                 applicableScript = scriptRecord.Script
216 |                 break
217 |         # 2. if no suitable script record was found, return an empty list
218 |         if applicableScript is None:
219 |             applicableScript = defaultScript
220 |         if applicableScript is None:
221 |             return []
222 |         # 3. get the applicable langSys records
223 |         defaultLangSys = applicableScript.DefaultLangSys
224 |         specificLangSys = None
225 |         # if we have a langSys and the table
226 |         # defines specific langSys behavior,
227 |         # try to find a matching langSys record
228 |         if langSys is not None and applicableScript.LangSysCount:
229 |             for langSysRecord in applicableScript.LangSysRecord:
230 |                 langSysTag = langSysRecord.LangSysTag
231 |                 if langSysTag == langSys:
232 |                     specificLangSys = langSysRecord.LangSys
233 |                     break
234 |         # 4. get the list of applicable features
235 |         applicableFeatures = set()
236 |         if specificLangSys is None:
237 |             if defaultLangSys is not None:
238 |                 if defaultLangSys.FeatureCount:
239 |                     applicableFeatures |= set(defaultLangSys.FeatureIndex)
240 |                 if defaultLangSys.ReqFeatureIndex != 0xFFFF:
241 |                     applicableFeatures.add(defaultLangSys.ReqFeatureIndex)
242 |         else:
243 |             if specificLangSys.FeatureCount:
244 |                 applicableFeatures |= set(specificLangSys.FeatureIndex)
245 |             if specificLangSys.ReqFeatureIndex != 0xFFFF:
246 |                 applicableFeatures.add(specificLangSys.ReqFeatureIndex)
247 |         applicableFeatures = self._getFeatures(applicableFeatures)
248 |         # store the found features for potential use by this method
249 |         self._applicableFeatureCache[script, langSys] = applicableFeatures
250 |         return applicableFeatures
251 | 
252 |     def _getFeatures(self, indices):
253 |         """
254 |         Get a list of ordered features located at indices.
255 |         """
256 |         featureList = self.FeatureList
257 |         featureRecords = featureList.FeatureRecord
258 |         features = [featureRecords[index] for index in sorted(indices)]
259 |         return features
260 | 
261 |     def _getLookups(self, indices):
262 |         """
263 |         Get a list of ordered lookups at indices
264 |         """
265 |         lookupList = self.LookupList
266 |         lookupRecords = lookupList.Lookup
267 |         lookups = [lookupRecords[index] for index in sorted(indices)]
268 |         return lookups
269 | 
270 |     # ----------
271 |     # processing
272 |     # ----------
273 | 
274 |     def _processLookups(self, glyphRecords, lookups, processingAalt=False, logger=None):
275 |         aaltHolding = []
276 |         boundarySensitive = set(["init", "medi", "fina", "isol"])
277 |         for featureTag, lookup in lookups:
278 |             # store aalt for processing at the end
279 |             if not processingAalt and featureTag == "aalt":
280 |                 aaltHolding.append((featureTag, lookup))
281 |                 continue
282 |             if logger:
283 |                 logger.logLookupStart(self, featureTag, lookup)
284 |             processed = []
285 |             # loop through the glyph records
286 |             while glyphRecords:
287 |                 skip = False
288 |                 if featureTag in boundarySensitive:
289 |                     side1GlyphNames = [r.getSide1GlyphNameWithUnicodeValue(self._cmap) for r in processed] + [r.getSide1GlyphNameWithUnicodeValue(self._cmap) for r in glyphRecords]
290 |                     side2GlyphNames = [r.getSide2GlyphNameWithUnicodeValue(self._cmap) for r in processed] + [r.getSide2GlyphNameWithUnicodeValue(self._cmap) for r in glyphRecords]
291 |                     index = len(processed)
292 |                     wordBreakBefore = isWordBreakBefore(side1GlyphNames, index, self._cmap)
293 |                     wordBreakAfter = isWordBreakAfter(side2GlyphNames, index, self._cmap)
294 |                 if featureTag == "init":
295 |                     if not wordBreakBefore or wordBreakAfter:
296 |                         skip = True
297 |                 elif featureTag == "medi":
298 |                     if wordBreakBefore or wordBreakAfter:
299 |                         skip = True
300 |                 elif featureTag == "fina":
301 |                     if wordBreakBefore or not wordBreakAfter:
302 |                         skip = True
303 |                 elif featureTag == "isol":
304 |                     if not wordBreakBefore or not wordBreakAfter:
305 |                         skip = True
306 |                 # loop through the lookups subtables
307 |                 performedAction = False
308 |                 if not skip:
309 |                     processed, glyphRecords, performedAction = self._processLookup(processed, glyphRecords, lookup, featureTag, logger=logger)
310 |                 if not performedAction:
311 |                     processed.append(glyphRecords[0])
312 |                     glyphRecords = glyphRecords[1:]
313 |             glyphRecords = processed
314 |             if logger:
315 |                 logger.logLookupEnd()
316 |         # process aalt for the final glyph records
317 |         if not processingAalt and aaltHolding:
318 |             glyphRecords = self._processLookups(glyphRecords, aaltHolding, processingAalt=True, logger=logger)
319 |         return glyphRecords
320 | 
321 |     def _processLookup(self, processed, glyphRecords, lookup, featureTag, logger=None):
322 |         performedAction = False
323 |         for subtable in lookup.SubTable:
324 |             if logger:
325 |                 logger.logSubTableStart(lookup, subtable)
326 |                 logger.logInput(processed, glyphRecords)
327 |             processed, glyphRecords, performedAction = subtable.process(processed, glyphRecords, featureTag)
328 |             if logger:
329 |                 if performedAction:
330 |                     logger.logOutput(processed, glyphRecords)
331 |                 logger.logSubTableEnd()
332 |             if performedAction:
333 |                 break
334 |         return processed, glyphRecords, performedAction
335 | 
336 | 
337 | class GSUB(BaseTable):
338 | 
339 |     _LookupListClass = GSUBLookupList
340 | 
341 | 
342 | class GPOS(BaseTable):
343 | 
344 |     _LookupListClass = GPOSLookupList
345 | 
346 | 
347 | class GDEF(object):
348 | 
349 |     def __init__(self):
350 |         self.GlyphClassDef = None
351 |         self.MarkAttachClassDef = None
352 |         self.AttachList = None
353 |         self.LigCaretList = None
354 | 
355 |     def loadFromFontTools(self, table):
356 |         table = table.table
357 |         if table.GlyphClassDef is not None:
358 |             self.GlyphClassDef = GlyphClassDef().loadFromFontTools(table.GlyphClassDef)
359 |         if table.AttachList is not None:
360 |             self.AttachList = AttachList().loadFromFontTools(table.AttachList)
361 |         if table.LigCaretList is not None:
362 |             self.LigCaretList = LigCaretList().loadFromFontTools(table.LigCaretList)
363 |         if table.MarkAttachClassDef is not None:
364 |             self.MarkAttachClassDef = MarkAttachClassDef().loadFromFontTools(table.MarkAttachClassDef)
365 |         return self
366 | 
367 | 
368 | class AttachList(object):
369 | 
370 |     """
371 |     Deviation from spec:
372 |     - GlyphCount attribute is not implemented.
373 |     """
374 | 
375 |     __slots__ = ["AttachPoint", "Coverage"]
376 | 
377 |     def __init__(self):
378 |         self.Coverage = None
379 |         self.AttachPoint = []
380 | 
381 |     def loadFromFontTools(self, attachList):
382 |         self.Coverage = Coverage().loadFromFontTools(attachList.Coverage)
383 |         for attachPoint in attachList.AttachPoint:
384 |             attachPoint = AttachPoint().loadFromFontTools(attachPoint)
385 |             self.AttachPoint.append(attachPoint)
386 |         return self
387 | 
388 | 
389 | class AttachPoint(object):
390 | 
391 |     """
392 |     Deviation from spec:
393 |     - PointCount attribute is not implemented.
394 |     """
395 | 
396 |     __slots__ = ["PointIndex"]
397 | 
398 |     def __init__(self):
399 |         self.PointIndex = []
400 | 
401 |     def loadFromFontTools(self, attachPoint):
402 |         self.PointIndex = list(attachPoint.PointIndex)
403 |         return self
404 | 
405 | 
406 | class LigCaretList(object):
407 | 
408 |     """
409 |     Deviation from spec:
410 |     - LigGlyphCount attribute is not implemented.
411 |     """
412 | 
413 |     __slots__ = ["LigGlyph", "Coverage"]
414 | 
415 |     def __init__(self):
416 |         self.LigGlyph = []
417 |         self.Coverage = None
418 | 
419 |     def loadFromFontTools(self, ligCaretList):
420 |         self.Coverage = Coverage().loadFromFontTools(ligCaretList.Coverage)
421 |         for ligGlyph in ligCaretList.LigGlyph:
422 |             ligGlyph = LigGlyph().loadFromFontTools(ligGlyph)
423 |             self.LigGlyph.append(ligGlyph)
424 |         return self
425 | 
426 | 
427 | class LigGlyph(object):
428 | 
429 |     """
430 |     Deviation from spec:
431 |     - CaretValueCount attribute is not implemented.
432 |     """
433 | 
434 |     __slots__ = ["CaretValue"]
435 | 
436 |     def __init__(self):
437 |         self.CaretValue = []
438 | 
439 |     def loadFromFontTools(self, ligGlyph):
440 |         for caretValue in ligGlyph.CaretValue:
441 |             format = caretValue.Format
442 |             if format == 1:
443 |                 caretValue = CaretValueFormat1().loadFromFontTools(caretValue)
444 |             elif format == 2:
445 |                 caretValue = CaretValueFormat2().loadFromFontTools(caretValue)
446 |             else:
447 |                 caretValue = CaretValueFormat3().loadFromFontTools(caretValue)
448 |             self.CaretValue.append(caretValue)
449 |         return self
450 | 
451 | 
452 | class CaretValueFormat1(object):
453 | 
454 |     __slots__ = ["CaretValueFormat", "Coordinate"]
455 | 
456 |     def __init__(self):
457 |         self.CaretValueFormat = 1
458 |         self.Coordinate = None
459 | 
460 |     def loadFromFontTools(self, caretValue):
461 |         self.Coordinate = caretValue.Coordinate
462 |         return self
463 | 
464 | 
465 | class CaretValueFormat2(object):
466 | 
467 |     __slots__ = ["CaretValueFormat", "CaretValuePoint"]
468 | 
469 |     def __init__(self):
470 |         self.CaretValueFormat = 2
471 |         self.CaretValuePoint = None
472 | 
473 |     def loadFromFontTools(self, caretValue):
474 |         self.CaretValuePoint = caretValue.CaretValuePoint
475 |         return self
476 | 
477 | 
478 | class CaretValueFormat3(CaretValueFormat1):
479 | 
480 |     """
481 |     Deviation from spec:
482 |     - DeviceTable attribute is not implemented.
483 |     """
484 | 
485 |     __slots__ = ["CaretValueFormat", "Coordinate", "DeviceTable"]
486 | 
487 |     def __init__(self):
488 |         super(CaretValueFormat3, self).__init__()
489 |         self.DeviceTable = None
490 | 


--------------------------------------------------------------------------------
/Lib/compositor/textUtilities.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import unicodedata
  3 | from compositor.cmap import reverseCMAP
  4 | from compositor.caseConversionMaps import lowerToSingleUpper, upperToSingleLower, specialCasing, softDotted
  5 | from compositor.wordBreakProperties import wordBreakProperties
  6 | 
  7 | # ---------------
  8 | # Case Conversion
  9 | # ---------------
 10 | 
 11 | def convertCase(case, glyphNames, cmap, reversedCMAP, language=None, fallbackGlyph=".notdef"):
 12 |     """
 13 |     Case Conversion Function
 14 | 
 15 |     This function converts a list of glyph names to their
 16 |     upper or lowercase forms following the Unicode locale
 17 |     specific case conversion rules.
 18 | 
 19 |     Arguments:
 20 |     - case
 21 |       The case to convert to. Valid values are "upper" and "lower".
 22 |     - glyphNames
 23 |       A list of glyph names.
 24 |     - cmap
 25 |       The CMAP for the font formatted as a dictionary.
 26 |     - reversedCMAP
 27 |       Reversed version of cmap.
 28 |     - language
 29 |       The language tag being processed. May be None.
 30 |     - fallbackGlyph
 31 |       The glyph name that should be used when the converted
 32 |       glyph does not exist in the font.
 33 |     """
 34 |     # before anything else happens, the glyph names
 35 |     # have to be converted to unicode values. if no
 36 |     # unicode value is available, the glyph name is used.
 37 |     glyphs = []
 38 |     for glyphName in glyphNames:
 39 |         uniValue = reversedCMAP.get(glyphName)
 40 |         if uniValue is None:
 41 |             glyphs.append(glyphName)
 42 |         else:
 43 |             glyphs.append(uniValue[0])
 44 |     converted = []
 45 |     for index, uniValue in enumerate(glyphs):
 46 |         # glyph name indicating that there is no available unicode
 47 |         if isinstance(uniValue, str):
 48 |             converted.append(uniValue)
 49 |             continue
 50 |         ## special casing
 51 |         # specific language
 52 |         if language is not None:
 53 |             madeChange = _handleSpecialCasing(case, glyphs, index, uniValue, converted, cmap, reversedCMAP, language)
 54 |             if madeChange:
 55 |                 continue
 56 |         # no specific language required
 57 |         madeChange = _handleSpecialCasing(case, glyphs, index, uniValue, converted, cmap, reversedCMAP, None)
 58 |         if madeChange:
 59 |             continue
 60 |         ## single casing
 61 |         if case == "upper":
 62 |             d = lowerToSingleUpper
 63 |         else:
 64 |             d = upperToSingleLower
 65 |         if uniValue in d:
 66 |             converted.append(d[uniValue])
 67 |             continue
 68 |         ## fallback
 69 |         converted.append(uniValue)
 70 |     # convert back to glyph names
 71 |     glyphNames = []
 72 |     for uniValue in converted:
 73 |         if isinstance(uniValue, str):
 74 |             glyphNames.append(uniValue)
 75 |             continue
 76 |         glyphNames.append(cmap.get(uniValue, fallbackGlyph))
 77 |     return glyphNames
 78 | 
 79 | def convertCodeToInt(code):
 80 |     if not code:
 81 |         return None
 82 |     if " " in code:
 83 |         return tuple([convertCodeToInt(i) for i in code.split(" ")])
 84 |     return int(code, 16)
 85 | 
 86 | def _handleSpecialCasing(case, glyphs, index, uniValue, converted, cmap, reversedCMAP, language):
 87 |     """
 88 |     Handle a language specific lookup.
 89 |     Returns a boolean indicating if a change was made.
 90 |     """
 91 |     if language not in specialCasing:
 92 |         return False
 93 |     languageMap = specialCasing[language]
 94 |     if uniValue in languageMap:
 95 |         contextMatch = True
 96 |         context = languageMap[uniValue]["context"]
 97 |         if context:
 98 |             contextMatch = False
 99 |             ## After_I
100 |             # The last preceding base character was
101 |             # an uppercase I, and there is no inter-
102 |             # vening combining character class 230.
103 |             if context == "After_I":
104 |                 previous = None
105 |                 for otherUniValue in reversed(glyphs[:index]):
106 |                     previous = otherUniValue
107 |                     if isinstance(otherUniValue, str):
108 |                         break
109 |                     combining = unicodedata.combining(chr(otherUniValue))
110 |                     if combining == 230:
111 |                         previous = None
112 |                         break
113 |                     if combining == 0:
114 |                         break
115 |                 if previous == convertCodeToInt("0049"):
116 |                     contextMatch = True
117 |             elif context == "Not_After_I":
118 |                 # not referenced in SpecialCasing
119 |                 raise NotImplementedError
120 |             ## After_Soft_Dotted
121 |             # The last preceding character with a
122 |             # combining class of zero before C was
123 |             # Soft_Dotted, and there is no interven-
124 |             # ing combining character class 230
125 |             elif context == "After_Soft_Dotted":
126 |                 previous = None
127 |                 for otherUniValue in reversed(glyphs[:index]):
128 |                     previous = otherUniValue
129 |                     if isinstance(otherUniValue, str):
130 |                         break
131 |                     combining = unicodedata.combining(chr(otherUniValue))
132 |                     if combining == 230:
133 |                         previous = None
134 |                         break
135 |                     if combining == 0:
136 |                         break
137 |                 if previous in softDotted:
138 |                     contextMatch = True
139 |             elif context == "Not_After_Soft_Dotted":
140 |                 # not referenced in SpecialCasing
141 |                 raise NotImplementedError
142 |             ## More_Above
143 |             # C is followed by one or more charac-
144 |             # ters of combining class 230 (ABOVE)
145 |             # in the combining character sequence.
146 |             elif context == "More_Above":
147 |                 next = None
148 |                 for otherUniValue in glyphs[index+1:]:
149 |                     next = otherUniValue
150 |                     if isinstance(otherUniValue, str):
151 |                         break
152 |                     combining = unicodedata.combining(chr(otherUniValue))
153 |                     if combining == 230:
154 |                         contextMatch = True
155 |                         break
156 |                     else:
157 |                         break
158 |             elif context == "Not_More_Above":
159 |                 # not referenced in SpecialCasing
160 |                 raise NotImplementedError
161 |             ## Before_Dot
162 |             # C is followed by U+0307 combining
163 |             # dot above. Any sequence of charac-
164 |             # ters with a combining class that is nei-
165 |             # ther 0 nor 230 may intervene between
166 |             # the current character and the com-
167 |             # bining dot above.
168 |             elif context == "Before_Dot":
169 |                 # not referenced in SpecialCasing
170 |                 raise NotImplementedError
171 |             elif context == "Not_Before_Dot":
172 |                 next = None
173 |                 contextMatch = True
174 |                 for otherUniValue in glyphs[index+1:]:
175 |                     if isinstance(otherUniValue, str):
176 |                         break
177 |                     if otherUniValue == convertCodeToInt("0307"):
178 |                         contextMatch = False
179 |                         break
180 |                     else:
181 |                         combining = unicodedata.combining(chr(otherUniValue))
182 |                         if combining == 0 or combining == 230:
183 |                             break
184 |             ## Final_Sigma
185 |             # Within the closest word boundaries
186 |             # containing C, there is a cased letter
187 |             # before C, and there is no cased letter
188 |             # after C.
189 |             elif context == "Final_Sigma":
190 |                 glyphNames = [cmap.get(i, i) for i in glyphs]
191 |                 if isWordBreakAfter(glyphNames, index, reversedCMAP):
192 |                     contextMatch = True
193 |             ## Unknown
194 |             else:
195 |                 raise NotImplementedError(context)
196 |         if contextMatch:
197 |             conversion = languageMap[uniValue][case]
198 |             # if the conversion is None, it means that the character should be removed.
199 |             if conversion is None:
200 |                 return True
201 |             # apply the conversion to the list of converted characters.
202 |             if not isinstance(conversion, tuple):
203 |                 conversion = [conversion]
204 |             for code in conversion:
205 |                 converted.append(code)
206 |             return True
207 |     return False
208 | 
209 | # -----------------------
210 | # Word Boundary Detection
211 | # -----------------------
212 | # This implements the default word boundary algorithm explained here:
213 | # http://www.unicode.org/reports/tr29/tr29-11.html#Default_Word_Boundaries
214 | 
215 | _notBreakBefore = set([
216 |     # Do not break within CRLF
217 |     (convertCodeToInt("240D"), convertCodeToInt("240A")),
218 |     # Do not break between most letters.
219 |     ("ALetter", "ALetter"),
220 |     # Do not break across certain punctuation.
221 |     ("ALetter", "MidLetter", "ALetter"),
222 |     # Do not break within sequences of digits, or digits adjacent to letters.
223 |     ("Numeric", "Numeric"),
224 |     ("Numeric", "ALetter"),
225 |     ("ALetter", "Numeric"),
226 |     # Do not break within sequences, such as "3.2" or "3,456.789".
227 |     ("Numeric", "MidNum", "Numeric"),
228 |     # Do not break between Katakana.
229 |     ("Katakana", "Katakana"),
230 |     # Do not break from extenders.
231 |     ("ALetter", "ExtendNumLet"),
232 |     ("Numeric", "ExtendNumLet"),
233 |     ("Katakana", "ExtendNumLet"),
234 |     ("ExtendNumLet", "ExtendNumLet"),
235 | ])
236 | 
237 | def isWordBreakBefore(glyphNames, index, reversedCMAP):
238 |     """
239 |     Returns a boolean declaring if the position
240 |     before index can be considered a word break.
241 |     """
242 |     # Start of line
243 |     if index == 0:
244 |         return True
245 |     # get the unicode values and word break properties
246 |     # for the previous two, current and next glyphs.
247 |     unicodeValue = reversedCMAP.get(glyphNames[index], [None])[0]
248 |     wordBreakProperty = wordBreakProperties.get(unicodeValue)
249 |     backOneUnicodeValue = reversedCMAP.get(glyphNames[index - 1], [None])[0]
250 |     backOneWordBreakProperty = wordBreakProperties.get(backOneUnicodeValue)
251 |     if index > 1:
252 |         backTwoUnicodeValue = reversedCMAP.get(glyphNames[index - 2], [None])[0]
253 |         backTwoWordBreakProperty = wordBreakProperties.get(backTwoUnicodeValue)
254 |     else:
255 |         backTwoUnicodeValue = False
256 |         backTwoWordBreakProperty = False
257 |     if index < len(glyphNames) - 1:
258 |         forwardOneUnicodeValue = reversedCMAP.get(glyphNames[index + 1], [None])[0]
259 |         forwardOneWordBreakProperty = wordBreakProperties.get(forwardOneUnicodeValue)
260 |     else:
261 |         forwardOneUnicodeValue = None
262 |         forwardOneWordBreakProperty = None
263 |     # test the previous and current unicode values
264 |     if (backOneUnicodeValue, unicodeValue) in _notBreakBefore:
265 |         return False
266 |     # test the previous and current word break properties
267 |     if (backOneWordBreakProperty, wordBreakProperty) in _notBreakBefore:
268 |         return False
269 |     # test the previous, current and next word break properties
270 |     if (backOneWordBreakProperty, wordBreakProperty, forwardOneWordBreakProperty) in _notBreakBefore:
271 |         return False
272 |     # test the previous, current and next word break properties
273 |     if (backTwoWordBreakProperty, backOneWordBreakProperty, wordBreakProperty) in _notBreakBefore:
274 |         return False
275 |     # Otherwise, break everywhere (including around ideographs).
276 |     return True
277 | 
278 | _notBreakAfter = set([
279 |     # Do not break within CRLF
280 |     (convertCodeToInt("240D"), convertCodeToInt("240A")),
281 |     # Do not break between most letters.
282 |     ("ALetter", "ALetter"),
283 |     # Do not break across certain punctuation.
284 |     ("ALetter", "MidLetter", "ALetter"),
285 |     # Do not break within sequences of digits, or digits adjacent to letters.
286 |     ("Numeric", "Numeric"),
287 |     ("Numeric", "ALetter"),
288 |     ("ALetter", "Numeric"),
289 |     # Do not break within sequences, such as "3.2" or "3,456.789".
290 |     ("Numeric", "MidNum", "Numeric"),
291 |     # Do not break between Katakana.
292 |     ("Katakana", "Katakana"),
293 |     # Do not break from extenders.
294 |     ("ExtendNumLet", "ALetter"),
295 |     ("ExtendNumLet", "Numeric"),
296 |     ("ExtendNumLet", "Katakana"),
297 | ])
298 | 
299 | def isWordBreakAfter(glyphNames, index, reversedCMAP):
300 |     """
301 |     Returns a boolean declaring if the position
302 |     after index can be considered a word break.
303 |     """
304 |     # End of line
305 |     if index == len(glyphNames) - 1:
306 |         return True
307 |     # get the unicode values and word break properties
308 |     # for the previous, current and next two glyphs.
309 |     unicodeValue = reversedCMAP.get(glyphNames[index], [None])[0]
310 |     wordBreakProperty = wordBreakProperties.get(unicodeValue)
311 |     forwardOneUnicodeValue = reversedCMAP.get(glyphNames[index + 1], [None])[0]
312 |     forwardOneWordBreakProperty = wordBreakProperties.get(forwardOneUnicodeValue)
313 |     if index > 0:
314 |         backOneUnicodeValue = reversedCMAP.get(glyphNames[index - 1], [None])[0]
315 |         backOneWordBreakProperty = wordBreakProperties.get(backOneUnicodeValue)
316 |     else:
317 |         backOneUnicodeValue = None
318 |         backOneWordBreakProperty = None
319 |     if index < len(glyphNames) - 2:
320 |         forwardTwoUnicodeValue = reversedCMAP.get(glyphNames[index + 2], [None])[0]
321 |         forwardTwoWordBreakProperty = wordBreakProperties.get(forwardTwoUnicodeValue)
322 |     else:
323 |         forwardTwoUnicodeValue = None
324 |         forwardTwoWordBreakProperty = None
325 |     # test the current and next unicode values
326 |     if (unicodeValue, forwardOneUnicodeValue) in _notBreakAfter:
327 |         return False
328 |     # test the current and next word break properties
329 |     if (wordBreakProperty, forwardOneWordBreakProperty) in _notBreakAfter:
330 |         return False
331 |     # test the previous, current and next word break properties
332 |     if (backOneWordBreakProperty, wordBreakProperty, forwardOneWordBreakProperty) in _notBreakAfter:
333 |         return False
334 |     # test the current and next two word break properties
335 |     if (wordBreakProperty, forwardOneWordBreakProperty, forwardTwoWordBreakProperty) in _notBreakAfter:
336 |         return False
337 |     # Otherwise, break everywhere (including around ideographs).
338 |     return True
339 | 
340 | # -----
341 | # Tests
342 | # -----
343 | 
344 | # Case Conversion
345 | 
346 | def testCaseConversionSimple():
347 |     """
348 |     >>> cmap = {convertCodeToInt("0041") : "A",
349 |     ...         convertCodeToInt("0061") : "a"
350 |     ...         }
351 |     >>> convertCase("upper", ["a", "a.alt"], cmap, reverseCMAP(cmap), None)
352 |     ['A', 'a.alt']
353 |     """
354 | 
355 | def testCaseConversionSimpleMissing():
356 |     """
357 |     >>> cmap = {convertCodeToInt("0061") : "a"}
358 |     >>> convertCase("upper", ["a"], cmap, reverseCMAP(cmap), None)
359 |     ['.notdef']
360 |     """
361 | 
362 | def testCaseConversionLowerAfterI():
363 |     """
364 |     >>> cmap = {convertCodeToInt("0049") : "I",
365 |     ...         convertCodeToInt("0069") : "i",
366 |     ...         convertCodeToInt("0307") : "dotabove",
367 |     ...         convertCodeToInt("0300") : "grave"
368 |     ...         }
369 |     >>> convertCase("lower", ["I", "dotabove"], cmap, reverseCMAP(cmap), "TRK")
370 |     ['i']
371 |     """
372 | 
373 | def testCaseConversionUpperAfterSoftDotted():
374 |     """
375 |     >>> cmap = {convertCodeToInt("0049") : "I",
376 |     ...         convertCodeToInt("0069") : "i",
377 |     ...         convertCodeToInt("0307") : "dotabove",
378 |     ...         convertCodeToInt("0300") : "grave"
379 |     ...         }
380 |     >>> convertCase("upper", ["i", "dotabove"], cmap, reverseCMAP(cmap), "LTH")
381 |     ['I']
382 |     >>> convertCase("upper", ["i", "grave", "dotabove"], cmap, reverseCMAP(cmap), "LTH")
383 |     ['I', 'grave', 'dotabove']
384 |     """
385 | 
386 | def testCaseConversionLowerMoreAbove():
387 |     """
388 |     >>> cmap = {convertCodeToInt("0049") : "I",
389 |     ...         convertCodeToInt("0069") : "i",
390 |     ...         convertCodeToInt("0307") : "dotabove",
391 |     ...         convertCodeToInt("0300") : "grave"
392 |     ...         }
393 |     >>> convertCase("lower", ["I", "grave"], cmap, reverseCMAP(cmap), "LTH")
394 |     ['i', 'dotabove', 'grave']
395 |     >>> convertCase("lower", ["I", "I", "grave"], cmap, reverseCMAP(cmap), "LTH")
396 |     ['i', 'i', 'dotabove', 'grave']
397 |     >>> convertCase("lower", ["I", "I"], cmap, reverseCMAP(cmap), "LTH")
398 |     ['i', 'i']
399 |     """
400 | 
401 | def testCaseConversionLowerNotBeforeDot():
402 |     """
403 |     >>> cmap = {convertCodeToInt("0049") : "I",
404 |     ...         convertCodeToInt("0069") : "i",
405 |     ...         convertCodeToInt("0307") : "dotabove",
406 |     ...         convertCodeToInt("0131") : "dotlessi",
407 |     ...         convertCodeToInt("0327") : "cedilla"
408 |     ...         }
409 |     >>> convertCase("lower", ["I"], cmap, reverseCMAP(cmap), "TRK")
410 |     ['dotlessi']
411 |     >>> convertCase("lower", ["I", "dotabove"], cmap, reverseCMAP(cmap), "TRK")
412 |     ['i']
413 |     >>> convertCase("lower", ["I", "cedilla", "dotabove"], cmap, reverseCMAP(cmap), "TRK")
414 |     ['i', 'cedilla']
415 |     """
416 | 
417 | def testCaseConversionFinalSigma():
418 |     """
419 |     >>> cmap = {convertCodeToInt("03A3") : "Sigma",
420 |     ...         convertCodeToInt("03C3") : "sigma",
421 |     ...         convertCodeToInt("03C2") : "finalsigma",
422 |     ...         convertCodeToInt("0020") : "space",
423 |     ...         }
424 |     >>> convertCase("lower", ["Sigma", "Sigma"], cmap, reverseCMAP(cmap))
425 |     ['sigma', 'finalsigma']
426 |     >>> convertCase("lower", ["Sigma", "Sigma", "space"], cmap, reverseCMAP(cmap))
427 |     ['sigma', 'finalsigma', 'space']
428 |     """
429 | 
430 | # Word Boundaries
431 | 
432 | def testBreakBefore():
433 |     """
434 |     >>> cmap = {convertCodeToInt("0020") : "space",
435 |     ...         convertCodeToInt("0041") : "A",
436 |     ...         convertCodeToInt("002E") : "period",
437 |     ...         convertCodeToInt("003A") : "colon",
438 |     ...         convertCodeToInt("005F") : "underscore",
439 |     ...         convertCodeToInt("0031") : "one",
440 |     ...         convertCodeToInt("31F0") : "ku",
441 |     ...         }
442 |     >>> cmap = reverseCMAP(cmap)
443 | 
444 |     # Start of line
445 |     >>> isWordBreakBefore(["A", "A"], 0, cmap)
446 |     True
447 | 
448 |     # ALetter, ALetter
449 |     >>> isWordBreakBefore(["space", "A", "A"], 1, cmap)
450 |     True
451 |     >>> isWordBreakBefore(["space", "A", "A"], 2, cmap)
452 |     False
453 | 
454 |     # ALetter, MidLetter, ALetter
455 |     >>> isWordBreakBefore(["A", "colon", "A"], 1, cmap)
456 |     False
457 |     >>> isWordBreakBefore(["A", "colon", "A"], 2, cmap)
458 |     False
459 |     >>> isWordBreakBefore(["A", "colon", "A", "colon", "A"], 1, cmap)
460 |     False
461 |     >>> isWordBreakBefore(["A", "colon", "A", "colon", "A"], 2, cmap)
462 |     False
463 |     >>> isWordBreakBefore(["A", "colon", "A", "colon", "A"], 3, cmap)
464 |     False
465 |     >>> isWordBreakBefore(["A", "colon", "A", "colon", "A"], 4, cmap)
466 |     False
467 | 
468 |     # Numeric, Numeric
469 |     >>> isWordBreakBefore(["space", "one", "one"], 1, cmap)
470 |     True
471 |     >>> isWordBreakBefore(["space", "one", "one"], 2, cmap)
472 |     False
473 | 
474 |     # ALetter, Numeric
475 |     >>> isWordBreakBefore(["space", "A", "one"], 1, cmap)
476 |     True
477 |     >>> isWordBreakBefore(["space", "A", "one"], 2, cmap)
478 |     False
479 | 
480 |     # Numeric, ALetter
481 |     >>> isWordBreakBefore(["space", "one", "A"], 1, cmap)
482 |     True
483 |     >>> isWordBreakBefore(["space", "one", "A"], 2, cmap)
484 |     False
485 | 
486 |     # Numeric, MidNum, Numeric
487 |     >>> isWordBreakBefore(["one", "period", "one"], 1, cmap)
488 |     False
489 |     >>> isWordBreakBefore(["one", "period", "one"], 2, cmap)
490 |     False
491 | 
492 |     # Katakana, Katakana
493 |     >>> isWordBreakBefore(["space", "ku", "ku"], 1, cmap)
494 |     True
495 |     >>> isWordBreakBefore(["space", "ku", "ku"], 2, cmap)
496 |     False
497 | 
498 |     # ALetter, ExtendNumLet
499 |     >>> isWordBreakBefore(["A", "underscore"], 1, cmap)
500 |     False
501 | 
502 |     # Numeric, ExtendNumLet
503 |     >>> isWordBreakBefore(["one", "underscore"], 1, cmap)
504 |     False
505 | 
506 |     # Katakana, ExtendNumLet
507 |     >>> isWordBreakBefore(["ku", "underscore"], 1, cmap)
508 |     False
509 | 
510 |     # ExtendNumLet, ExtendNumLet
511 |     >>> isWordBreakBefore(["underscore", "underscore"], 1, cmap)
512 |     False
513 |     """
514 | 
515 | def testBreakAfter():
516 |     """
517 |     >>> cmap = {convertCodeToInt("0020") : "space",
518 |     ...         convertCodeToInt("0041") : "A",
519 |     ...         convertCodeToInt("002E") : "period",
520 |     ...         convertCodeToInt("003A") : "colon",
521 |     ...         convertCodeToInt("005F") : "underscore",
522 |     ...         convertCodeToInt("0031") : "one",
523 |     ...         convertCodeToInt("31F0") : "ku",
524 |     ...         }
525 |     >>> cmap = reverseCMAP(cmap)
526 | 
527 |     # End of line
528 |     >>> isWordBreakAfter(["A", "A"], 1, cmap)
529 |     True
530 | 
531 |     # ALetter, ALetter
532 |     >>> isWordBreakAfter(["A", "A", "space"], 0, cmap)
533 |     False
534 |     >>> isWordBreakAfter(["A", "A", "space"], 1, cmap)
535 |     True
536 | 
537 |     # ALetter, MidLetter, ALetter
538 |     >>> isWordBreakAfter(["A", "colon", "A"], 0, cmap)
539 |     False
540 |     >>> isWordBreakAfter(["A", "colon", "A"], 1, cmap)
541 |     False
542 |     >>> isWordBreakAfter(["A", "colon", "A", "colon", "A"], 0, cmap)
543 |     False
544 |     >>> isWordBreakAfter(["A", "colon", "A", "colon", "A"], 1, cmap)
545 |     False
546 |     >>> isWordBreakAfter(["A", "colon", "A", "colon", "A"], 2, cmap)
547 |     False
548 |     >>> isWordBreakAfter(["A", "colon", "A", "colon", "A"], 3, cmap)
549 |     False
550 | 
551 |     # Numeric, Numeric
552 |     >>> isWordBreakAfter(["one", "one", "space"], 0, cmap)
553 |     False
554 |     >>> isWordBreakAfter(["one", "one", "space"], 1, cmap)
555 |     True
556 | 
557 |     # ALetter, Numeric
558 |     >>> isWordBreakAfter(["A", "one", "space"], 0, cmap)
559 |     False
560 |     >>> isWordBreakAfter(["A", "one", "space"], 1, cmap)
561 |     True
562 | 
563 |     # Numeric, ALetter
564 |     >>> isWordBreakAfter(["one", "A", "space"], 0, cmap)
565 |     False
566 |     >>> isWordBreakAfter(["one", "A", "space"], 1, cmap)
567 |     True
568 | 
569 |     # Numeric, MidNum, Numeric
570 |     >>> isWordBreakAfter(["one", "period", "one"], 0, cmap)
571 |     False
572 |     >>> isWordBreakAfter(["one", "period", "one"], 1, cmap)
573 |     False
574 |     >>> isWordBreakAfter(["one", "period", "one", "period", "one"], 0, cmap)
575 |     False
576 |     >>> isWordBreakAfter(["one", "period", "one", "period", "one"], 1, cmap)
577 |     False
578 |     >>> isWordBreakAfter(["one", "period", "one", "period", "one"], 2, cmap)
579 |     False
580 |     >>> isWordBreakAfter(["one", "period", "one", "period", "one"], 3, cmap)
581 |     False
582 | 
583 |     # Katakana, Katakana
584 |     >>> isWordBreakAfter(["ku", "ku", "space"], 0, cmap)
585 |     False
586 |     >>> isWordBreakAfter(["ku", "ku", "space"], 1, cmap)
587 |     True
588 | 
589 |     # ALetter, ExtendNumLet
590 |     >>> isWordBreakAfter(["underscore", "A"], 0, cmap)
591 |     False
592 | 
593 |     # Numeric, ExtendNumLet
594 |     >>> isWordBreakAfter(["underscore", "one"], 0, cmap)
595 |     False
596 | 
597 |     # Katakana, ExtendNumLet
598 |     >>> isWordBreakAfter(["underscore", "ku"], 0, cmap)
599 |     False
600 |     """
601 | 
602 | if __name__ == "__main__":
603 |     import doctest
604 |     doctest.testmod()
605 | 


--------------------------------------------------------------------------------
/License.txt:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Copyright (c) 2006-2009 Type Supply LLC
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include License.txt
2 | include Install.txt
3 | include demo.py
4 | include demo.html
5 | include development.txt
6 | include usage.html
7 | include usage.txt
8 | prune Lib/compositor/test
9 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | compositor
  2 | ==========
  3 | 
  4 | A basic OpenType GSUB and GPOS layout engine written in Python.
  5 | 
  6 | 
  7 | Table of Contents
  8 | -----------------
  9 | 
 10 | - [Usage Reference](#usage-reference)
 11 |     - [Assumptions](#assumptions)
 12 |     - [The Font Object](#the-font-object)
 13 |      - [The GlyphRecord Object](#the-glyphrecord-object)
 14 |      - [The Glyph Object](#the-glyph-object)
 15 |      - [The Info Object](#the-info-object)
 16 | - [Development](#development)
 17 | - [Installation](#installation)
 18 | 
 19 | 
 20 | - - -
 21 | 
 22 | 
 23 | Usage Reference
 24 | ---------------
 25 | 
 26 | This document covers the basic usage of the compositor package. For more detailed information read the documentation strings in the source.
 27 | 
 28 | ### Assumptions
 29 | 
 30 | Some assumptions about the OpenType fonts being used are made by the package:
 31 | 
 32 | * The font is valid.
 33 | * The font's `cmap` table contains Platform 3 Encoding 1.
 34 | * The font does not contain `GSUB` or `GPOS` lookup types that are not supported by the GSUB or GPOS objects. If an unsupported lookup type is present, the lookup will simply be ignored. It will not raise an error.
 35 | 
 36 | ### The Font Object
 37 | 
 38 | #### Importing
 39 | 
 40 | ```python
 41 | from compositor import Font
 42 | ```
 43 | 
 44 | #### Construction
 45 | 
 46 | ```python
 47 | font = Font(path)
 48 | ```
 49 | 
 50 | <dl>
 51 |   <dt>path
 52 |   <dd>A path to an OpenType font.
 53 | </dl>
 54 | 
 55 | #### Special Behavior
 56 | 
 57 | ```python
 58 | glyph = font["aGlyphName"]
 59 | ```
 60 | 
 61 | Returns the glyph object named `aGlyphName`. This will raise a `KeyError` if `aGlyphName` is not in the font.
 62 | 
 63 | ```python
 64 | isThere = "aGlyphName" in font
 65 | ```
 66 | 
 67 | Returns a boolean representing if `aGlyphName` is in the font.
 68 | 
 69 | #### Methods
 70 | 
 71 | ```python
 72 | font.keys()
 73 | ```
 74 | 
 75 | A list of all glyph names in the font.
 76 | 
 77 | ```python
 78 | glyphRecords = font.process(aString)
 79 | ```
 80 | 
 81 | This is the most important method. It takes a string (Unicode or plain ASCII) and processes it with the features defined in the font's `GSUB` and `GPOS` tables. A list of `GlyphRecord` objects will be returned.
 82 | 
 83 | ```python
 84 | featureTags = font.getFeatureList()
 85 | ```
 86 | 
 87 | A list of all available features in GSUB and GPOS.
 88 | 
 89 | ```python
 90 | state = font.getFeatureState(featureTag)
 91 | ```
 92 | 
 93 | Get a boolean representing if a feature is on or not. This assumes that the feature state is consistent in both the GSUB and GPOS tables. A `CompositorError` will be raised if the feature is inconsistently applied. A `CompositorError` will be raised if featureTag is not defined in GSUB or GPOS.
 94 | 
 95 | ```python
 96 | font.setFeatureState(self, featureTag, state)
 97 | ```
 98 | 
 99 | Set the application state of a feature.
100 | 
101 | #### Attributes
102 | 
103 | <dl>
104 |   <dt>info
105 |   <dd>The Info object for the font.
106 | </dl>
107 | 
108 | ### The GlyphRecord Object
109 | 
110 | #### Attributes
111 | 
112 | <dl>
113 | 
114 |   <dt>glyphName
115 |   <dd>The name of the referenced glyph.
116 | 
117 |   <dt>xPlacement
118 |   <dd>Horizontal placement.
119 | 
120 |   <dt>yPlacement
121 |   <dd>Vertical placement.
122 | 
123 |   <dt>xAdvance
124 |   <dd>Horizontal adjustment for advance.
125 | 
126 |   <dt>yAdvance
127 |   <dd>Vertical adjustment for advance.
128 | 
129 |   <dt>alternates
130 |   <dd>A list of `GlyphRecords` indicating alternates for the glyph.
131 | 
132 | </dl>
133 | 
134 | ### The Glyph Object
135 | 
136 | #### Methods
137 | 
138 | ```python
139 | glyph.draw(pen)
140 | ```
141 | 
142 | Draws the glyph with a FontTools pen.
143 | 
144 | #### Attributes
145 | 
146 | <dl>
147 | 
148 |   <dt>name
149 |   <dd>The name of the glyph.
150 | 
151 |   <dt>index
152 |   <dd>The glyph's index within the source font.
153 | 
154 |   <dt>width
155 |   <dd>The width of the glyph.
156 | 
157 |   <dt>bounds
158 |   <dd>The bounding box for the glyph. Formatted as `(xMin, yMin, xMax, yMax)`. If the glyph contains no outlines, this will return `None`.
159 | 
160 | </dl>
161 | 
162 | ### The Info Object
163 | 
164 | #### Attributes
165 | 
166 | - familyName
167 | - styleName
168 | - unitsPerEm
169 | - ascender
170 | - descender
171 | 
172 | 
173 | Development
174 | -----------
175 | 
176 | ### Relationship to the GSUB and GPOS Specification
177 | 
178 | The Compositor GSUB and GPOS tables adhere as closely as possible to the GSUB and GPOS specification. Every effort has been made to keep terminology consistent. All known deviations from the spec are documented. (The deviations are generally trivial. For example, most the of the subtables don't implement the `Count` attributes. This is done because the Python iterator provides a more convenient and faster way to deal with iteration than creating a range. Therefore, the `Count` objects are not needed.)
179 | 
180 | ### Object Loading
181 | 
182 | For performance reasons, when a new font is loaded, all of the GSUB and GPOS data is extracted from the font with fontTools. The data is placed into compositor objects. These objects are then used to process text. This initial loading can be relatively expensive, but the processing speed of the objects is worth the initial expense.
183 | 
184 | 
185 | Installation
186 | ------------
187 | 
188 | To install this package, type the following in the command line:
189 | 
190 | ```
191 | python setup.py install
192 | ```
193 | 


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
 1 | from AppKit import *
 2 | from fontTools.pens.cocoaPen import CocoaPen
 3 | from compositor import Font
 4 | 
 5 | # a simple function that implements path caching
 6 | def getCachedNSBezierPath(glyph, font):
 7 |     if not hasattr(glyph, "nsBezierPath"):
 8 |         pen = CocoaPen(font)
 9 |         glyph.draw(pen)
10 |         glyph.nsBezierPath = pen.path
11 |     return glyph.nsBezierPath
12 | 
13 | # a path to a font
14 | fontPath = aPathToYourFont
15 | 
16 | # a path to save the image to
17 | imagePath = "demo.tiff"
18 | 
19 | # setup the layout engine
20 | font = Font(fontPath)
21 | 
22 | # turn the aalt feature on so that we get any alternates
23 | font.setFeatureState("aalt", True)
24 | 
25 | # process some text
26 | glyphRecords = font.process(u"HERE IS SOME TEXT!")
27 | 
28 | # calculate the image size
29 | pointSize = 50.0
30 | offset = 20
31 | scale = pointSize / font.info.unitsPerEm
32 | imageWidth = sum([font[record.glyphName].width + record.xAdvance for record in glyphRecords]) * scale
33 | imageWidth = int(round(imageWidth))
34 | imageWidth += offset * 2
35 | imageHeight = pointSize + (offset * 2)
36 | 
37 | # setup the image
38 | image = NSImage.alloc().initWithSize_((imageWidth, imageHeight))
39 | image.lockFocus()
40 | # fill it with white
41 | NSColor.whiteColor().set()
42 | NSRectFill(((0, 0), (imageWidth, imageHeight)))
43 | # offset and set the scale
44 | transform = NSAffineTransform.transform()
45 | transform.translateXBy_yBy_(offset, offset)
46 | transform.scaleBy_(scale)
47 | transform.translateXBy_yBy_(0, abs(font.info.descender))
48 | transform.concat()
49 | # iterate over the glyph records
50 | for record in glyphRecords:
51 |     glyph = font[record.glyphName]
52 |     # shift for x and y placement
53 |     transform = NSAffineTransform.transform()
54 |     transform.translateXBy_yBy_(record.xPlacement, record.yPlacement)
55 |     transform.concat()
56 |     # if alternates are present, switch the color
57 |     if record.alternates:
58 |         NSColor.redColor().set()
59 |     # otherwise, set the color to black
60 |     else:
61 |         NSColor.blackColor().set()
62 |     # get a NSBezierPath for the glyph and fill it
63 |     path = getCachedNSBezierPath(glyph, font)
64 |     path.fill()
65 |     # shift for the next glyph
66 |     transform = NSAffineTransform.transform()
67 |     transform.translateXBy_yBy_(record.xAdvance + glyph.width, -record.yPlacement)
68 |     transform.concat()
69 | # release the image
70 | image.unlockFocus()
71 | # write the image to disk
72 | tiff = image.TIFFRepresentation()
73 | tiff.writeToFile_atomically_(imagePath, False)
74 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools"]
3 | build-backend = "setuptools.build_meta"
4 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from __future__ import print_function
 4 | from setuptools import setup
 5 | 
 6 | try:
 7 |     import fontTools
 8 | except:
 9 |     print("*** Warning: defcon requires FontTools, see:")
10 |     print("    https://github.com/fonttools/fonttools")
11 | 
12 | 
13 | 
14 | setup(
15 |     name="compositor",
16 |     version="0.2b",
17 |     description="A simple OpenType GSUB and GPOS engine.",
18 |     author="Tal Leming",
19 |     author_email="tal@typesupply.com",
20 |     maintainer="Just van Rossum, Frederik Berlaen",
21 |     maintainer_email="justvanrossum@gmail.com",
22 |     url="https://github.com/robotools/compositor",
23 |     license="MIT",
24 |     packages=["compositor"],
25 |     package_dir={"":"Lib"}
26 | )
27 | 


--------------------------------------------------------------------------------
/todo.txt:
--------------------------------------------------------------------------------
 1 | - the GlyphRecord object is getting messy. Look into
 2 |   consolidating all the various private reference attributes.
 3 | - the various private contextual attrs should be moved to properties.
 4 | - make the various Count attributes properties.
 5 | 
 6 | 
 7 | Not yet supported:
 8 | GSUB Lookup Type 8 (Reverse Chaining Context)
 9 | LookupFlag.RightToLeft
10 | 
11 | Skipped:
12 | - should the glyph width be calculated into the glyph record's
13 |   x-advance before returning it to the caller?
14 |   - no. that is the responsibility of a render.
15 | - need to handle featureRecord.FeatureParams if they exist (fontTools does not read these)
16 | - make rand feature support smarter. look back through the
17 |   processed glyphs and find the alternate that has occurred
18 |   least frequently and farthest away. (this was skipped because
19 |   the rand feature is not such a good idea to begin with)
20 | 


--------------------------------------------------------------------------------
/tools/SpecialCasing.txt:
--------------------------------------------------------------------------------
  1 | # SpecialCasing-5.0.0.txt
  2 | # Date: 2006-03-03, 08:23:36 GMT [MD]
  3 | #
  4 | # Unicode Character Database
  5 | # Copyright (c) 1991-2006 Unicode, Inc.
  6 | # For terms of use, see http://www.unicode.org/terms_of_use.html
  7 | # For documentation, see UCD.html
  8 | #
  9 | # Special Casing Properties
 10 | #
 11 | # This file is a supplement to the UnicodeData file.
 12 | # It contains additional information about the casing of Unicode characters.
 13 | # (For compatibility, the UnicodeData.txt file only contains case mappings for
 14 | # characters where they are 1-1, and does not have locale-specific mappings.)
 15 | # For more information, see the discussion of Case Mappings in the Unicode Standard.
 16 | #
 17 | # All code points not listed in this file that do not have a simple case mappings
 18 | # in UnicodeData.txt map to themselves.
 19 | # ================================================================================
 20 | # Format
 21 | # ================================================================================
 22 | # The entries in this file are in the following machine-readable format:
 23 | #
 24 | # <code>; <lower> ; <title> ; <upper> ; (<condition_list> ;)? # <comment>
 25 | #
 26 | # <code>, <lower>, <title>, and <upper> provide character values in hex. If there is more
 27 | # than one character, they are separated by spaces. Other than as used to separate 
 28 | # elements, spaces are to be ignored.
 29 | #
 30 | # The <condition_list> is optional. Where present, it consists of one or more locale IDs
 31 | # or contexts, separated by spaces. In these conditions:
 32 | # - A condition list overrides the normal behavior if all of the listed conditions are true.
 33 | # - The context is always the context of the characters in the original string,
 34 | #   NOT in the resulting string.
 35 | # - Case distinctions in the condition list are not significant.
 36 | # - Conditions preceded by "Not_" represent the negation of the condition.
 37 | #
 38 | # A locale ID is defined by taking any language tag as defined by
 39 | # RFC 3066 (or its successor), and replacing '-' by '_'.
 40 | #
 41 | # A context for a character C is defined by Section 3.13 Default Case 
 42 | # Operations, of The Unicode Standard, Version 5.0.
 43 | # (This is identical to the context defined by Unicode 4.1.0,
 44 | #  as specified in http://www.unicode.org/versions/Unicode4.1.0/)
 45 | #
 46 | # Parsers of this file must be prepared to deal with future additions to this format:
 47 | #  * Additional contexts
 48 | #  * Additional fields
 49 | # ================================================================================
 50 | 
 51 | # ================================================================================
 52 | # Unconditional mappings
 53 | # ================================================================================
 54 | 
 55 | # The German es-zed is special--the normal mapping is to SS.
 56 | # Note: the titlecase should never occur in practice. It is equal to titlecase(uppercase(<es-zed>))
 57 | 
 58 | 00DF; 00DF; 0053 0073; 0053 0053; # LATIN SMALL LETTER SHARP S
 59 | 
 60 | # Preserve canonical equivalence for I with dot. Turkic is handled below.
 61 | 
 62 | 0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
 63 | 
 64 | # Ligatures
 65 | 
 66 | FB00; FB00; 0046 0066; 0046 0046; # LATIN SMALL LIGATURE FF
 67 | FB01; FB01; 0046 0069; 0046 0049; # LATIN SMALL LIGATURE FI
 68 | FB02; FB02; 0046 006C; 0046 004C; # LATIN SMALL LIGATURE FL
 69 | FB03; FB03; 0046 0066 0069; 0046 0046 0049; # LATIN SMALL LIGATURE FFI
 70 | FB04; FB04; 0046 0066 006C; 0046 0046 004C; # LATIN SMALL LIGATURE FFL
 71 | FB05; FB05; 0053 0074; 0053 0054; # LATIN SMALL LIGATURE LONG S T
 72 | FB06; FB06; 0053 0074; 0053 0054; # LATIN SMALL LIGATURE ST
 73 | 
 74 | 0587; 0587; 0535 0582; 0535 0552; # ARMENIAN SMALL LIGATURE ECH YIWN
 75 | FB13; FB13; 0544 0576; 0544 0546; # ARMENIAN SMALL LIGATURE MEN NOW
 76 | FB14; FB14; 0544 0565; 0544 0535; # ARMENIAN SMALL LIGATURE MEN ECH
 77 | FB15; FB15; 0544 056B; 0544 053B; # ARMENIAN SMALL LIGATURE MEN INI
 78 | FB16; FB16; 054E 0576; 054E 0546; # ARMENIAN SMALL LIGATURE VEW NOW
 79 | FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
 80 | 
 81 | # No corresponding uppercase precomposed character
 82 | 
 83 | 0149; 0149; 02BC 004E; 02BC 004E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
 84 | 0390; 0390; 0399 0308 0301; 0399 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
 85 | 03B0; 03B0; 03A5 0308 0301; 03A5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
 86 | 01F0; 01F0; 004A 030C; 004A 030C; # LATIN SMALL LETTER J WITH CARON
 87 | 1E96; 1E96; 0048 0331; 0048 0331; # LATIN SMALL LETTER H WITH LINE BELOW
 88 | 1E97; 1E97; 0054 0308; 0054 0308; # LATIN SMALL LETTER T WITH DIAERESIS
 89 | 1E98; 1E98; 0057 030A; 0057 030A; # LATIN SMALL LETTER W WITH RING ABOVE
 90 | 1E99; 1E99; 0059 030A; 0059 030A; # LATIN SMALL LETTER Y WITH RING ABOVE
 91 | 1E9A; 1E9A; 0041 02BE; 0041 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING
 92 | 1F50; 1F50; 03A5 0313; 03A5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI
 93 | 1F52; 1F52; 03A5 0313 0300; 03A5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
 94 | 1F54; 1F54; 03A5 0313 0301; 03A5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
 95 | 1F56; 1F56; 03A5 0313 0342; 03A5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
 96 | 1FB6; 1FB6; 0391 0342; 0391 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI
 97 | 1FC6; 1FC6; 0397 0342; 0397 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI
 98 | 1FD2; 1FD2; 0399 0308 0300; 0399 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
 99 | 1FD3; 1FD3; 0399 0308 0301; 0399 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
100 | 1FD6; 1FD6; 0399 0342; 0399 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI
101 | 1FD7; 1FD7; 0399 0308 0342; 0399 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
102 | 1FE2; 1FE2; 03A5 0308 0300; 03A5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
103 | 1FE3; 1FE3; 03A5 0308 0301; 03A5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
104 | 1FE4; 1FE4; 03A1 0313; 03A1 0313; # GREEK SMALL LETTER RHO WITH PSILI
105 | 1FE6; 1FE6; 03A5 0342; 03A5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI
106 | 1FE7; 1FE7; 03A5 0308 0342; 03A5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
107 | 1FF6; 1FF6; 03A9 0342; 03A9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
108 | 
109 | # IMPORTANT-when capitalizing iota-subscript (0345)
110 | #  It MUST be in normalized form--moved to the end of any sequence of combining marks.
111 | #  This is because logically it represents a following base character!
112 | #  E.g. <iota_subscript> (<Mn> | <Mc> | <Me>)+ => (<Mn> | <Mc> | <Me>)+ <iota_subscript>
113 | # It should never be the first character in a word, so in titlecasing it can be left as is.
114 | 
115 | # The following cases are already in the UnicodeData file, so are only commented here.
116 | 
117 | # 0345; 0345; 0345; 0399; # COMBINING GREEK YPOGEGRAMMENI
118 | 
119 | # All letters with YPOGEGRAMMENI (iota-subscript) or PROSGEGRAMMENI (iota adscript)
120 | # have special uppercases.
121 | # Note: characters with PROSGEGRAMMENI are actually titlecase, not uppercase!
122 | 
123 | 1F80; 1F80; 1F88; 1F08 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
124 | 1F81; 1F81; 1F89; 1F09 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
125 | 1F82; 1F82; 1F8A; 1F0A 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
126 | 1F83; 1F83; 1F8B; 1F0B 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
127 | 1F84; 1F84; 1F8C; 1F0C 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
128 | 1F85; 1F85; 1F8D; 1F0D 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
129 | 1F86; 1F86; 1F8E; 1F0E 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
130 | 1F87; 1F87; 1F8F; 1F0F 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
131 | 1F88; 1F80; 1F88; 1F08 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
132 | 1F89; 1F81; 1F89; 1F09 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
133 | 1F8A; 1F82; 1F8A; 1F0A 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
134 | 1F8B; 1F83; 1F8B; 1F0B 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
135 | 1F8C; 1F84; 1F8C; 1F0C 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
136 | 1F8D; 1F85; 1F8D; 1F0D 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
137 | 1F8E; 1F86; 1F8E; 1F0E 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
138 | 1F8F; 1F87; 1F8F; 1F0F 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
139 | 1F90; 1F90; 1F98; 1F28 0399; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
140 | 1F91; 1F91; 1F99; 1F29 0399; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
141 | 1F92; 1F92; 1F9A; 1F2A 0399; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
142 | 1F93; 1F93; 1F9B; 1F2B 0399; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
143 | 1F94; 1F94; 1F9C; 1F2C 0399; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
144 | 1F95; 1F95; 1F9D; 1F2D 0399; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
145 | 1F96; 1F96; 1F9E; 1F2E 0399; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
146 | 1F97; 1F97; 1F9F; 1F2F 0399; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
147 | 1F98; 1F90; 1F98; 1F28 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
148 | 1F99; 1F91; 1F99; 1F29 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
149 | 1F9A; 1F92; 1F9A; 1F2A 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
150 | 1F9B; 1F93; 1F9B; 1F2B 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
151 | 1F9C; 1F94; 1F9C; 1F2C 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
152 | 1F9D; 1F95; 1F9D; 1F2D 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
153 | 1F9E; 1F96; 1F9E; 1F2E 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
154 | 1F9F; 1F97; 1F9F; 1F2F 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
155 | 1FA0; 1FA0; 1FA8; 1F68 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
156 | 1FA1; 1FA1; 1FA9; 1F69 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
157 | 1FA2; 1FA2; 1FAA; 1F6A 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
158 | 1FA3; 1FA3; 1FAB; 1F6B 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
159 | 1FA4; 1FA4; 1FAC; 1F6C 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
160 | 1FA5; 1FA5; 1FAD; 1F6D 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
161 | 1FA6; 1FA6; 1FAE; 1F6E 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
162 | 1FA7; 1FA7; 1FAF; 1F6F 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
163 | 1FA8; 1FA0; 1FA8; 1F68 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
164 | 1FA9; 1FA1; 1FA9; 1F69 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
165 | 1FAA; 1FA2; 1FAA; 1F6A 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
166 | 1FAB; 1FA3; 1FAB; 1F6B 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
167 | 1FAC; 1FA4; 1FAC; 1F6C 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
168 | 1FAD; 1FA5; 1FAD; 1F6D 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
169 | 1FAE; 1FA6; 1FAE; 1F6E 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
170 | 1FAF; 1FA7; 1FAF; 1F6F 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
171 | 1FB3; 1FB3; 1FBC; 0391 0399; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
172 | 1FBC; 1FB3; 1FBC; 0391 0399; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
173 | 1FC3; 1FC3; 1FCC; 0397 0399; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
174 | 1FCC; 1FC3; 1FCC; 0397 0399; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
175 | 1FF3; 1FF3; 1FFC; 03A9 0399; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
176 | 1FFC; 1FF3; 1FFC; 03A9 0399; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
177 | 
178 | # Some characters with YPOGEGRAMMENI also have no corresponding titlecases
179 | 
180 | 1FB2; 1FB2; 1FBA 0345; 1FBA 0399; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
181 | 1FB4; 1FB4; 0386 0345; 0386 0399; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
182 | 1FC2; 1FC2; 1FCA 0345; 1FCA 0399; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
183 | 1FC4; 1FC4; 0389 0345; 0389 0399; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
184 | 1FF2; 1FF2; 1FFA 0345; 1FFA 0399; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
185 | 1FF4; 1FF4; 038F 0345; 038F 0399; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
186 | 
187 | 1FB7; 1FB7; 0391 0342 0345; 0391 0342 0399; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
188 | 1FC7; 1FC7; 0397 0342 0345; 0397 0342 0399; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
189 | 1FF7; 1FF7; 03A9 0342 0345; 03A9 0342 0399; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
190 | 
191 | # ================================================================================
192 | # Conditional mappings
193 | # ================================================================================
194 | 
195 | # Special case for final form of sigma
196 | 
197 | 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
198 | 
199 | # Note: the following cases for non-final are already in the UnicodeData file.
200 | 
201 | # 03A3; 03C3; 03A3; 03A3; # GREEK CAPITAL LETTER SIGMA
202 | # 03C3; 03C3; 03A3; 03A3; # GREEK SMALL LETTER SIGMA
203 | # 03C2; 03C2; 03A3; 03A3; # GREEK SMALL LETTER FINAL SIGMA
204 | 
205 | # Note: the following cases are not included, since they would case-fold in lowercasing
206 | 
207 | # 03C3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK SMALL LETTER SIGMA
208 | # 03C2; 03C3; 03A3; 03A3; Not_Final_Sigma; # GREEK SMALL LETTER FINAL SIGMA
209 | 
210 | # ================================================================================
211 | # Locale-sensitive mappings
212 | # ================================================================================
213 | 
214 | # Lithuanian
215 | 
216 | # Lithuanian retains the dot in a lowercase i when followed by accents.
217 | 
218 | # Remove DOT ABOVE after "i" with upper or titlecase
219 | 
220 | 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
221 | 
222 | # Introduce an explicit dot above when lowercasing capital I's and J's
223 | # whenever there are more accents above.
224 | # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
225 | 
226 | 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
227 | 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
228 | 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
229 | 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
230 | 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
231 | 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
232 | 
233 | # ================================================================================
234 | 
235 | # Turkish and Azeri
236 | 
237 | # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
238 | # The following rules handle those cases.
239 | 
240 | 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE
241 | 0130; 0069; 0130; 0130; az; # LATIN CAPITAL LETTER I WITH DOT ABOVE
242 | 
243 | # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
244 | # This matches the behavior of the canonically equivalent I-dot_above
245 | 
246 | 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
247 | 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
248 | 
249 | # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
250 | 
251 | 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
252 | 0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I
253 | 
254 | # When uppercasing, i turns into a dotted capital I
255 | 
256 | 0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I
257 | 0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I
258 | 
259 | # Note: the following case is already in the UnicodeData file.
260 | 
261 | # 0131; 0131; 0049; 0049; tr; # LATIN SMALL LETTER DOTLESS I
262 | 
263 | # EOF
264 | 
265 | 


--------------------------------------------------------------------------------
/tools/UnicodeReferenceGenerator.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This script generates the caseConversionMaps.py and wordBreakProperties.py modules.
  3 | It references the following Unicode files:
  4 |     PropList.txt
  5 |     SpecialCasing.txt
  6 |     UnicodeData.txt
  7 |     WordBreakProperty.txt
  8 | """
  9 | 
 10 | import os
 11 | import pprint
 12 | import time
 13 | import compositor
 14 | 
 15 | # -----
 16 | # Tools
 17 | # -----
 18 | 
 19 | def filterLines(path):
 20 |     f = open(path, "r")
 21 |     text = f.read()
 22 |     f.close()
 23 |     lines = []
 24 |     for line in text.splitlines():
 25 |         line = line.strip()
 26 |         if not line or line.startswith("#"):
 27 |             continue
 28 |         lines.append(line)
 29 |     return lines
 30 | 
 31 | def convertCodeToInt(code):
 32 |     if not code:
 33 |         return None
 34 |     if " " in code:
 35 |         return tuple([convertCodeToInt(i) for i in code.split(" ")])
 36 |     return int(code, 16)
 37 | 
 38 | # ----------------------
 39 | # wordBreakProperties.py
 40 | # ----------------------
 41 | 
 42 | p = os.path.join(os.path.dirname(__file__), "WordBreakProperty.txt")
 43 | 
 44 | wordBreakProperties = {}
 45 | 
 46 | for line in filterLines(p):
 47 |     data = line.split(";")
 48 |     data = [i.split("#")[0].strip() for i in data]
 49 |     code, property = data
 50 |     if ".." in code:
 51 |         start, end = code.split("..")
 52 |         start = convertCodeToInt(start)
 53 |         end = convertCodeToInt(end)
 54 |         codes = range(start, end+1)
 55 |     else:
 56 |         codes = [convertCodeToInt(code)]
 57 |     for code in codes:
 58 |         assert code not in wordBreakProperties
 59 |         wordBreakProperties[code] = property
 60 | 
 61 | # Write the module
 62 | 
 63 | text = [
 64 |     "# ------------",
 65 |     "# DO NOT EDIT!",
 66 |     "# ------------",
 67 |     "# This was generated by tools/%s" % os.path.basename(__file__),
 68 |     "# Generated on: %s" % (time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime())),
 69 |     "",
 70 | ]
 71 | 
 72 | text.append("%s = %s" % ("wordBreakProperties", pprint.pformat(wordBreakProperties, indent=4)))
 73 | 
 74 | text = "\n".join(text)
 75 | 
 76 | path = os.path.join(os.path.dirname(compositor.__file__), "wordBreakProperties.py")
 77 | 
 78 | f = open(path, "w")
 79 | f.write(text)
 80 | f.close()
 81 | 
 82 | # ---------------------
 83 | # caseConversionMaps.py
 84 | # ---------------------
 85 | 
 86 | # Simple Casing
 87 | 
 88 | lowerToSingleUpper = {}
 89 | upperToSingleLower = {}
 90 | 
 91 | p = os.path.join(os.path.dirname(__file__), "UnicodeData.txt")
 92 | 
 93 | for line in filterLines(p):
 94 |     data = line.split(";")
 95 |     code = convertCodeToInt(data[0])
 96 |     upper = data[12]
 97 |     lower = data[13]
 98 |     name = data[1]
 99 |     if upper:
100 |         lowerToSingleUpper[code] = convertCodeToInt(upper)
101 |     if lower:
102 |         upperToSingleLower[code] = convertCodeToInt(lower)
103 | 
104 | # Special Casing
105 | 
106 | p = os.path.join(os.path.dirname(__file__),  "SpecialCasing.txt")
107 | 
108 | languageTags = {
109 |     "lt" : "LTH",
110 |     "tr" : "TRK",
111 |     "az" : "AZE"
112 | }
113 | 
114 | specialCasing = {}
115 | 
116 | for line in filterLines(p):
117 |     data = [i.strip() for i in line.split(";")]
118 |     code = convertCodeToInt(data[0])
119 |     lower = convertCodeToInt(data[1])
120 |     upper = convertCodeToInt(data[3])
121 |     language = None
122 |     context = None
123 |     if line.count(";") == 4:
124 |         name = data[4]
125 |     else:
126 |         language = data[4]
127 |         name = data[5]
128 |         if " " in language:
129 |             language, context = language.split(" ")
130 |         if language == "Final_Sigma":
131 |             context = language
132 |             language = None
133 |         else:
134 |             language = languageTags[language]
135 |     if language not in specialCasing:
136 |         specialCasing[language] = {}
137 |     specialCasing[language][code] = dict(context=context, upper=upper, lower=lower)
138 | 
139 | # Property List
140 | 
141 | softDotted = {}
142 | 
143 | p = os.path.join(os.path.dirname(__file__),  "PropList.txt")
144 | 
145 | for line in filterLines(p):
146 |     code, prop = [i.strip() for i in line.split(";")]
147 |     prop = prop.split("#")[0].strip()
148 |     s = None
149 |     if prop == "Soft_Dotted":
150 |         s = softDotted
151 |     if s is None:
152 |         continue
153 |     if ".." in code:
154 |         start, end = code.split("..")
155 |         start = convertCodeToInt(start)
156 |         end = convertCodeToInt(end)
157 |         codes = range(start, end+1)
158 |     else:
159 |         codes = [convertCodeToInt(code)]
160 |     for code in codes:
161 |         s[code] = None
162 | 
163 | # Write the module
164 | 
165 | path = os.path.join(os.path.dirname(compositor.__file__), "caseConversionMaps.py")
166 | 
167 | text = [
168 |     "# ------------",
169 |     "# DO NOT EDIT!",
170 |     "# ------------",
171 |     "# This was generated by tools/%s" % os.path.basename(__file__),
172 |     "# Generated on: %s" % (time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime())),
173 |     "",
174 | ]
175 | 
176 | toWrite = [
177 |     ("lowerToSingleUpper", lowerToSingleUpper),
178 |     ("upperToSingleLower", upperToSingleLower),
179 |     ("specialCasing", specialCasing),
180 |     ("softDotted", softDotted)
181 | ]
182 | 
183 | for name, d in toWrite:
184 |     s = "%s = %s" % (name, pprint.pformat(d, indent=4))
185 |     text.append(s)
186 |     text.append("")
187 | 
188 | text = "\n".join(text)
189 | 
190 | f = open(path, "w")
191 | f.write(text)
192 | f.close()
193 | 


--------------------------------------------------------------------------------
/tools/WordBreakProperty.txt:
--------------------------------------------------------------------------------
  1 | # WordBreakProperty-5.0.0.txt
  2 | # Date: 2006-06-07, 23:23:03 GMT [MD]
  3 | #
  4 | # Unicode Character Database
  5 | # Copyright (c) 1991-2006 Unicode, Inc.
  6 | # For terms of use, see http://www.unicode.org/terms_of_use.html
  7 | # For documentation, see UCD.html
  8 | 
  9 | # ================================================
 10 | 
 11 | # Property:	Word_Break
 12 | 
 13 | #  All code points not explicitly listed for Word_Break
 14 | #  have the value Other (XX).
 15 | 
 16 | # @missing: 0000..10FFFF; Other
 17 | 
 18 | # ================================================
 19 | 
 20 | 00AD          ; Format # Cf       SOFT HYPHEN
 21 | 0600..0603    ; Format # Cf   [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA
 22 | 06DD          ; Format # Cf       ARABIC END OF AYAH
 23 | 070F          ; Format # Cf       SYRIAC ABBREVIATION MARK
 24 | 17B4..17B5    ; Format # Cf   [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
 25 | 200B          ; Format # Cf       ZERO WIDTH SPACE
 26 | 200E..200F    ; Format # Cf   [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
 27 | 202A..202E    ; Format # Cf   [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
 28 | 2060..2063    ; Format # Cf   [4] WORD JOINER..INVISIBLE SEPARATOR
 29 | 206A..206F    ; Format # Cf   [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES
 30 | FEFF          ; Format # Cf       ZERO WIDTH NO-BREAK SPACE
 31 | FFF9..FFFB    ; Format # Cf   [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR
 32 | 1D173..1D17A  ; Format # Cf   [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
 33 | E0001         ; Format # Cf       LANGUAGE TAG
 34 | E0020..E007F  ; Format # Cf  [96] TAG SPACE..CANCEL TAG
 35 | 
 36 | # Total code points: 136
 37 | 
 38 | # ================================================
 39 | 
 40 | 3031..3035    ; Katakana # Lm   [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF
 41 | 309B..309C    ; Katakana # Sk   [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
 42 | 30A0          ; Katakana # Pd       KATAKANA-HIRAGANA DOUBLE HYPHEN
 43 | 30A1..30FA    ; Katakana # Lo  [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO
 44 | 30FC..30FE    ; Katakana # Lm   [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK
 45 | 30FF          ; Katakana # Lo       KATAKANA DIGRAPH KOTO
 46 | 31F0..31FF    ; Katakana # Lo  [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
 47 | FF66..FF6F    ; Katakana # Lo  [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU
 48 | FF70          ; Katakana # Lm       HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
 49 | FF71..FF9D    ; Katakana # Lo  [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N
 50 | FF9E..FF9F    ; Katakana # Lm   [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
 51 | 
 52 | # Total code points: 176
 53 | 
 54 | # ================================================
 55 | 
 56 | 0041..005A    ; ALetter # L&  [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
 57 | 0061..007A    ; ALetter # L&  [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z
 58 | 00AA          ; ALetter # L&       FEMININE ORDINAL INDICATOR
 59 | 00B5          ; ALetter # L&       MICRO SIGN
 60 | 00BA          ; ALetter # L&       MASCULINE ORDINAL INDICATOR
 61 | 00C0..00D6    ; ALetter # L&  [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS
 62 | 00D8..00F6    ; ALetter # L&  [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS
 63 | 00F8..01BA    ; ALetter # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL
 64 | 01BB          ; ALetter # Lo       LATIN LETTER TWO WITH STROKE
 65 | 01BC..01BF    ; ALetter # L&   [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN
 66 | 01C0..01C3    ; ALetter # Lo   [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK
 67 | 01C4..0293    ; ALetter # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL
 68 | 0294          ; ALetter # Lo       LATIN LETTER GLOTTAL STOP
 69 | 0295..02AF    ; ALetter # L&  [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL
 70 | 02B0..02C1    ; ALetter # Lm  [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP
 71 | 02C6..02D1    ; ALetter # Lm  [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON
 72 | 02E0..02E4    ; ALetter # Lm   [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
 73 | 02EE          ; ALetter # Lm       MODIFIER LETTER DOUBLE APOSTROPHE
 74 | 037A          ; ALetter # Lm       GREEK YPOGEGRAMMENI
 75 | 037B..037D    ; ALetter # L&   [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL
 76 | 0386          ; ALetter # L&       GREEK CAPITAL LETTER ALPHA WITH TONOS
 77 | 0388..038A    ; ALetter # L&   [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS
 78 | 038C          ; ALetter # L&       GREEK CAPITAL LETTER OMICRON WITH TONOS
 79 | 038E..03A1    ; ALetter # L&  [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO
 80 | 03A3..03CE    ; ALetter # L&  [44] GREEK CAPITAL LETTER SIGMA..GREEK SMALL LETTER OMEGA WITH TONOS
 81 | 03D0..03F5    ; ALetter # L&  [38] GREEK BETA SYMBOL..GREEK LUNATE EPSILON SYMBOL
 82 | 03F7..0481    ; ALetter # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA
 83 | 048A..0513    ; ALetter # L& [138] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH HOOK
 84 | 0531..0556    ; ALetter # L&  [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH
 85 | 0559          ; ALetter # Lm       ARMENIAN MODIFIER LETTER LEFT HALF RING
 86 | 0561..0587    ; ALetter # L&  [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN
 87 | 05D0..05EA    ; ALetter # Lo  [27] HEBREW LETTER ALEF..HEBREW LETTER TAV
 88 | 05F0..05F2    ; ALetter # Lo   [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD
 89 | 05F3          ; ALetter # Po       HEBREW PUNCTUATION GERESH
 90 | 0621..063A    ; ALetter # Lo  [26] ARABIC LETTER HAMZA..ARABIC LETTER GHAIN
 91 | 0640          ; ALetter # Lm       ARABIC TATWEEL
 92 | 0641..064A    ; ALetter # Lo  [10] ARABIC LETTER FEH..ARABIC LETTER YEH
 93 | 066E..066F    ; ALetter # Lo   [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF
 94 | 0671..06D3    ; ALetter # Lo  [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE
 95 | 06D5          ; ALetter # Lo       ARABIC LETTER AE
 96 | 06E5..06E6    ; ALetter # Lm   [2] ARABIC SMALL WAW..ARABIC SMALL YEH
 97 | 06EE..06EF    ; ALetter # Lo   [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V
 98 | 06FA..06FC    ; ALetter # Lo   [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW
 99 | 06FF          ; ALetter # Lo       ARABIC LETTER HEH WITH INVERTED V
100 | 0710          ; ALetter # Lo       SYRIAC LETTER ALAPH
101 | 0712..072F    ; ALetter # Lo  [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH
102 | 074D..076D    ; ALetter # Lo  [33] SYRIAC LETTER SOGDIAN ZHAIN..ARABIC LETTER SEEN WITH TWO DOTS VERTICALLY ABOVE
103 | 0780..07A5    ; ALetter # Lo  [38] THAANA LETTER HAA..THAANA LETTER WAAVU
104 | 07B1          ; ALetter # Lo       THAANA LETTER NAA
105 | 07CA..07EA    ; ALetter # Lo  [33] NKO LETTER A..NKO LETTER JONA RA
106 | 07F4..07F5    ; ALetter # Lm   [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE
107 | 07FA          ; ALetter # Lm       NKO LAJANYALAN
108 | 0903          ; ALetter # Mc       DEVANAGARI SIGN VISARGA
109 | 0904..0939    ; ALetter # Lo  [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA
110 | 093D          ; ALetter # Lo       DEVANAGARI SIGN AVAGRAHA
111 | 093E..0940    ; ALetter # Mc   [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II
112 | 0949..094C    ; ALetter # Mc   [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU
113 | 0950          ; ALetter # Lo       DEVANAGARI OM
114 | 0958..0961    ; ALetter # Lo  [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL
115 | 097B..097F    ; ALetter # Lo   [5] DEVANAGARI LETTER GGA..DEVANAGARI LETTER BBA
116 | 0982..0983    ; ALetter # Mc   [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA
117 | 0985..098C    ; ALetter # Lo   [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L
118 | 098F..0990    ; ALetter # Lo   [2] BENGALI LETTER E..BENGALI LETTER AI
119 | 0993..09A8    ; ALetter # Lo  [22] BENGALI LETTER O..BENGALI LETTER NA
120 | 09AA..09B0    ; ALetter # Lo   [7] BENGALI LETTER PA..BENGALI LETTER RA
121 | 09B2          ; ALetter # Lo       BENGALI LETTER LA
122 | 09B6..09B9    ; ALetter # Lo   [4] BENGALI LETTER SHA..BENGALI LETTER HA
123 | 09BD          ; ALetter # Lo       BENGALI SIGN AVAGRAHA
124 | 09BF..09C0    ; ALetter # Mc   [2] BENGALI VOWEL SIGN I..BENGALI VOWEL SIGN II
125 | 09C7..09C8    ; ALetter # Mc   [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI
126 | 09CB..09CC    ; ALetter # Mc   [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU
127 | 09CE          ; ALetter # Lo       BENGALI LETTER KHANDA TA
128 | 09DC..09DD    ; ALetter # Lo   [2] BENGALI LETTER RRA..BENGALI LETTER RHA
129 | 09DF..09E1    ; ALetter # Lo   [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL
130 | 09F0..09F1    ; ALetter # Lo   [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL
131 | 0A03          ; ALetter # Mc       GURMUKHI SIGN VISARGA
132 | 0A05..0A0A    ; ALetter # Lo   [6] GURMUKHI LETTER A..GURMUKHI LETTER UU
133 | 0A0F..0A10    ; ALetter # Lo   [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI
134 | 0A13..0A28    ; ALetter # Lo  [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA
135 | 0A2A..0A30    ; ALetter # Lo   [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA
136 | 0A32..0A33    ; ALetter # Lo   [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA
137 | 0A35..0A36    ; ALetter # Lo   [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA
138 | 0A38..0A39    ; ALetter # Lo   [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA
139 | 0A3E..0A40    ; ALetter # Mc   [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II
140 | 0A59..0A5C    ; ALetter # Lo   [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA
141 | 0A5E          ; ALetter # Lo       GURMUKHI LETTER FA
142 | 0A72..0A74    ; ALetter # Lo   [3] GURMUKHI IRI..GURMUKHI EK ONKAR
143 | 0A83          ; ALetter # Mc       GUJARATI SIGN VISARGA
144 | 0A85..0A8D    ; ALetter # Lo   [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E
145 | 0A8F..0A91    ; ALetter # Lo   [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O
146 | 0A93..0AA8    ; ALetter # Lo  [22] GUJARATI LETTER O..GUJARATI LETTER NA
147 | 0AAA..0AB0    ; ALetter # Lo   [7] GUJARATI LETTER PA..GUJARATI LETTER RA
148 | 0AB2..0AB3    ; ALetter # Lo   [2] GUJARATI LETTER LA..GUJARATI LETTER LLA
149 | 0AB5..0AB9    ; ALetter # Lo   [5] GUJARATI LETTER VA..GUJARATI LETTER HA
150 | 0ABD          ; ALetter # Lo       GUJARATI SIGN AVAGRAHA
151 | 0ABE..0AC0    ; ALetter # Mc   [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II
152 | 0AC9          ; ALetter # Mc       GUJARATI VOWEL SIGN CANDRA O
153 | 0ACB..0ACC    ; ALetter # Mc   [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU
154 | 0AD0          ; ALetter # Lo       GUJARATI OM
155 | 0AE0..0AE1    ; ALetter # Lo   [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL
156 | 0B02..0B03    ; ALetter # Mc   [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA
157 | 0B05..0B0C    ; ALetter # Lo   [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L
158 | 0B0F..0B10    ; ALetter # Lo   [2] ORIYA LETTER E..ORIYA LETTER AI
159 | 0B13..0B28    ; ALetter # Lo  [22] ORIYA LETTER O..ORIYA LETTER NA
160 | 0B2A..0B30    ; ALetter # Lo   [7] ORIYA LETTER PA..ORIYA LETTER RA
161 | 0B32..0B33    ; ALetter # Lo   [2] ORIYA LETTER LA..ORIYA LETTER LLA
162 | 0B35..0B39    ; ALetter # Lo   [5] ORIYA LETTER VA..ORIYA LETTER HA
163 | 0B3D          ; ALetter # Lo       ORIYA SIGN AVAGRAHA
164 | 0B40          ; ALetter # Mc       ORIYA VOWEL SIGN II
165 | 0B47..0B48    ; ALetter # Mc   [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI
166 | 0B4B..0B4C    ; ALetter # Mc   [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU
167 | 0B5C..0B5D    ; ALetter # Lo   [2] ORIYA LETTER RRA..ORIYA LETTER RHA
168 | 0B5F..0B61    ; ALetter # Lo   [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL
169 | 0B71          ; ALetter # Lo       ORIYA LETTER WA
170 | 0B83          ; ALetter # Lo       TAMIL SIGN VISARGA
171 | 0B85..0B8A    ; ALetter # Lo   [6] TAMIL LETTER A..TAMIL LETTER UU
172 | 0B8E..0B90    ; ALetter # Lo   [3] TAMIL LETTER E..TAMIL LETTER AI
173 | 0B92..0B95    ; ALetter # Lo   [4] TAMIL LETTER O..TAMIL LETTER KA
174 | 0B99..0B9A    ; ALetter # Lo   [2] TAMIL LETTER NGA..TAMIL LETTER CA
175 | 0B9C          ; ALetter # Lo       TAMIL LETTER JA
176 | 0B9E..0B9F    ; ALetter # Lo   [2] TAMIL LETTER NYA..TAMIL LETTER TTA
177 | 0BA3..0BA4    ; ALetter # Lo   [2] TAMIL LETTER NNA..TAMIL LETTER TA
178 | 0BA8..0BAA    ; ALetter # Lo   [3] TAMIL LETTER NA..TAMIL LETTER PA
179 | 0BAE..0BB9    ; ALetter # Lo  [12] TAMIL LETTER MA..TAMIL LETTER HA
180 | 0BBF          ; ALetter # Mc       TAMIL VOWEL SIGN I
181 | 0BC1..0BC2    ; ALetter # Mc   [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU
182 | 0BC6..0BC8    ; ALetter # Mc   [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI
183 | 0BCA..0BCC    ; ALetter # Mc   [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU
184 | 0C01..0C03    ; ALetter # Mc   [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA
185 | 0C05..0C0C    ; ALetter # Lo   [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L
186 | 0C0E..0C10    ; ALetter # Lo   [3] TELUGU LETTER E..TELUGU LETTER AI
187 | 0C12..0C28    ; ALetter # Lo  [23] TELUGU LETTER O..TELUGU LETTER NA
188 | 0C2A..0C33    ; ALetter # Lo  [10] TELUGU LETTER PA..TELUGU LETTER LLA
189 | 0C35..0C39    ; ALetter # Lo   [5] TELUGU LETTER VA..TELUGU LETTER HA
190 | 0C41..0C44    ; ALetter # Mc   [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR
191 | 0C60..0C61    ; ALetter # Lo   [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
192 | 0C82..0C83    ; ALetter # Mc   [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA
193 | 0C85..0C8C    ; ALetter # Lo   [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L
194 | 0C8E..0C90    ; ALetter # Lo   [3] KANNADA LETTER E..KANNADA LETTER AI
195 | 0C92..0CA8    ; ALetter # Lo  [23] KANNADA LETTER O..KANNADA LETTER NA
196 | 0CAA..0CB3    ; ALetter # Lo  [10] KANNADA LETTER PA..KANNADA LETTER LLA
197 | 0CB5..0CB9    ; ALetter # Lo   [5] KANNADA LETTER VA..KANNADA LETTER HA
198 | 0CBD          ; ALetter # Lo       KANNADA SIGN AVAGRAHA
199 | 0CBE          ; ALetter # Mc       KANNADA VOWEL SIGN AA
200 | 0CC0..0CC1    ; ALetter # Mc   [2] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN U
201 | 0CC3..0CC4    ; ALetter # Mc   [2] KANNADA VOWEL SIGN VOCALIC R..KANNADA VOWEL SIGN VOCALIC RR
202 | 0CC7..0CC8    ; ALetter # Mc   [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI
203 | 0CCA..0CCB    ; ALetter # Mc   [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
204 | 0CDE          ; ALetter # Lo       KANNADA LETTER FA
205 | 0CE0..0CE1    ; ALetter # Lo   [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
206 | 0D02..0D03    ; ALetter # Mc   [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
207 | 0D05..0D0C    ; ALetter # Lo   [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L
208 | 0D0E..0D10    ; ALetter # Lo   [3] MALAYALAM LETTER E..MALAYALAM LETTER AI
209 | 0D12..0D28    ; ALetter # Lo  [23] MALAYALAM LETTER O..MALAYALAM LETTER NA
210 | 0D2A..0D39    ; ALetter # Lo  [16] MALAYALAM LETTER PA..MALAYALAM LETTER HA
211 | 0D3F..0D40    ; ALetter # Mc   [2] MALAYALAM VOWEL SIGN I..MALAYALAM VOWEL SIGN II
212 | 0D46..0D48    ; ALetter # Mc   [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI
213 | 0D4A..0D4C    ; ALetter # Mc   [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU
214 | 0D60..0D61    ; ALetter # Lo   [2] MALAYALAM LETTER VOCALIC RR..MALAYALAM LETTER VOCALIC LL
215 | 0D82..0D83    ; ALetter # Mc   [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA
216 | 0D85..0D96    ; ALetter # Lo  [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA
217 | 0D9A..0DB1    ; ALetter # Lo  [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA
218 | 0DB3..0DBB    ; ALetter # Lo   [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA
219 | 0DBD          ; ALetter # Lo       SINHALA LETTER DANTAJA LAYANNA
220 | 0DC0..0DC6    ; ALetter # Lo   [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA
221 | 0DD0..0DD1    ; ALetter # Mc   [2] SINHALA VOWEL SIGN KETTI AEDA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA
222 | 0DD8..0DDE    ; ALetter # Mc   [7] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN KOMBUVA HAA GAYANUKITTA
223 | 0DF2..0DF3    ; ALetter # Mc   [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA
224 | 0F00          ; ALetter # Lo       TIBETAN SYLLABLE OM
225 | 0F40..0F47    ; ALetter # Lo   [8] TIBETAN LETTER KA..TIBETAN LETTER JA
226 | 0F49..0F6A    ; ALetter # Lo  [34] TIBETAN LETTER NYA..TIBETAN LETTER FIXED-FORM RA
227 | 0F7F          ; ALetter # Mc       TIBETAN SIGN RNAM BCAD
228 | 0F88..0F8B    ; ALetter # Lo   [4] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN GRU MED RGYINGS
229 | 10A0..10C5    ; ALetter # L&  [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE
230 | 10D0..10FA    ; ALetter # Lo  [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN
231 | 10FC          ; ALetter # Lm       MODIFIER LETTER GEORGIAN NAR
232 | 1100..1159    ; ALetter # Lo  [90] HANGUL CHOSEONG KIYEOK..HANGUL CHOSEONG YEORINHIEUH
233 | 115F..11A2    ; ALetter # Lo  [68] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG SSANGARAEA
234 | 11A8..11F9    ; ALetter # Lo  [82] HANGUL JONGSEONG KIYEOK..HANGUL JONGSEONG YEORINHIEUH
235 | 1200..1248    ; ALetter # Lo  [73] ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA
236 | 124A..124D    ; ALetter # Lo   [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE
237 | 1250..1256    ; ALetter # Lo   [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO
238 | 1258          ; ALetter # Lo       ETHIOPIC SYLLABLE QHWA
239 | 125A..125D    ; ALetter # Lo   [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE
240 | 1260..1288    ; ALetter # Lo  [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA
241 | 128A..128D    ; ALetter # Lo   [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE
242 | 1290..12B0    ; ALetter # Lo  [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA
243 | 12B2..12B5    ; ALetter # Lo   [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE
244 | 12B8..12BE    ; ALetter # Lo   [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO
245 | 12C0          ; ALetter # Lo       ETHIOPIC SYLLABLE KXWA
246 | 12C2..12C5    ; ALetter # Lo   [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE
247 | 12C8..12D6    ; ALetter # Lo  [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O
248 | 12D8..1310    ; ALetter # Lo  [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA
249 | 1312..1315    ; ALetter # Lo   [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE
250 | 1318..135A    ; ALetter # Lo  [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA
251 | 1380..138F    ; ALetter # Lo  [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE
252 | 13A0..13F4    ; ALetter # Lo  [85] CHEROKEE LETTER A..CHEROKEE LETTER YV
253 | 1401..166C    ; ALetter # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA
254 | 166F..1676    ; ALetter # Lo   [8] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS NNGAA
255 | 1681..169A    ; ALetter # Lo  [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH
256 | 16A0..16EA    ; ALetter # Lo  [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X
257 | 16EE..16F0    ; ALetter # Nl   [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL
258 | 1700..170C    ; ALetter # Lo  [13] TAGALOG LETTER A..TAGALOG LETTER YA
259 | 170E..1711    ; ALetter # Lo   [4] TAGALOG LETTER LA..TAGALOG LETTER HA
260 | 1720..1731    ; ALetter # Lo  [18] HANUNOO LETTER A..HANUNOO LETTER HA
261 | 1740..1751    ; ALetter # Lo  [18] BUHID LETTER A..BUHID LETTER HA
262 | 1760..176C    ; ALetter # Lo  [13] TAGBANWA LETTER A..TAGBANWA LETTER YA
263 | 176E..1770    ; ALetter # Lo   [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA
264 | 1820..1842    ; ALetter # Lo  [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI
265 | 1843          ; ALetter # Lm       MONGOLIAN LETTER TODO LONG VOWEL SIGN
266 | 1844..1877    ; ALetter # Lo  [52] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER MANCHU ZHA
267 | 1880..18A8    ; ALetter # Lo  [41] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI BHA
268 | 1900..191C    ; ALetter # Lo  [29] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER HA
269 | 1923..1926    ; ALetter # Mc   [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU
270 | 1929..192B    ; ALetter # Mc   [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA
271 | 1930..1931    ; ALetter # Mc   [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA
272 | 1933..1938    ; ALetter # Mc   [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA
273 | 1A00..1A16    ; ALetter # Lo  [23] BUGINESE LETTER KA..BUGINESE LETTER HA
274 | 1A19..1A1B    ; ALetter # Mc   [3] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN AE
275 | 1B04          ; ALetter # Mc       BALINESE SIGN BISAH
276 | 1B05..1B33    ; ALetter # Lo  [47] BALINESE LETTER AKARA..BALINESE LETTER HA
277 | 1B35          ; ALetter # Mc       BALINESE VOWEL SIGN TEDUNG
278 | 1B3B          ; ALetter # Mc       BALINESE VOWEL SIGN RA REPA TEDUNG
279 | 1B3D..1B41    ; ALetter # Mc   [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG
280 | 1B43          ; ALetter # Mc       BALINESE VOWEL SIGN PEPET TEDUNG
281 | 1B45..1B4B    ; ALetter # Lo   [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK
282 | 1D00..1D2B    ; ALetter # L&  [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL
283 | 1D2C..1D61    ; ALetter # Lm  [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI
284 | 1D62..1D77    ; ALetter # L&  [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G
285 | 1D78          ; ALetter # Lm       MODIFIER LETTER CYRILLIC EN
286 | 1D79..1D9A    ; ALetter # L&  [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK
287 | 1D9B..1DBF    ; ALetter # Lm  [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA
288 | 1E00..1E9B    ; ALetter # L& [156] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER LONG S WITH DOT ABOVE
289 | 1EA0..1EF9    ; ALetter # L&  [90] LATIN CAPITAL LETTER A WITH DOT BELOW..LATIN SMALL LETTER Y WITH TILDE
290 | 1F00..1F15    ; ALetter # L&  [22] GREEK SMALL LETTER ALPHA WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA
291 | 1F18..1F1D    ; ALetter # L&   [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
292 | 1F20..1F45    ; ALetter # L&  [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA
293 | 1F48..1F4D    ; ALetter # L&   [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
294 | 1F50..1F57    ; ALetter # L&   [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI
295 | 1F59          ; ALetter # L&       GREEK CAPITAL LETTER UPSILON WITH DASIA
296 | 1F5B          ; ALetter # L&       GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
297 | 1F5D          ; ALetter # L&       GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
298 | 1F5F..1F7D    ; ALetter # L&  [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA
299 | 1F80..1FB4    ; ALetter # L&  [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
300 | 1FB6..1FBC    ; ALetter # L&   [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
301 | 1FBE          ; ALetter # L&       GREEK PROSGEGRAMMENI
302 | 1FC2..1FC4    ; ALetter # L&   [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
303 | 1FC6..1FCC    ; ALetter # L&   [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
304 | 1FD0..1FD3    ; ALetter # L&   [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
305 | 1FD6..1FDB    ; ALetter # L&   [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA
306 | 1FE0..1FEC    ; ALetter # L&  [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA
307 | 1FF2..1FF4    ; ALetter # L&   [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
308 | 1FF6..1FFC    ; ALetter # L&   [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
309 | 2071          ; ALetter # L&       SUPERSCRIPT LATIN SMALL LETTER I
310 | 207F          ; ALetter # L&       SUPERSCRIPT LATIN SMALL LETTER N
311 | 2090..2094    ; ALetter # Lm   [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA
312 | 2102          ; ALetter # L&       DOUBLE-STRUCK CAPITAL C
313 | 2107          ; ALetter # L&       EULER CONSTANT
314 | 210A..2113    ; ALetter # L&  [10] SCRIPT SMALL G..SCRIPT SMALL L
315 | 2115          ; ALetter # L&       DOUBLE-STRUCK CAPITAL N
316 | 2119..211D    ; ALetter # L&   [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R
317 | 2124          ; ALetter # L&       DOUBLE-STRUCK CAPITAL Z
318 | 2126          ; ALetter # L&       OHM SIGN
319 | 2128          ; ALetter # L&       BLACK-LETTER CAPITAL Z
320 | 212A..212D    ; ALetter # L&   [4] KELVIN SIGN..BLACK-LETTER CAPITAL C
321 | 212F..2134    ; ALetter # L&   [6] SCRIPT SMALL E..SCRIPT SMALL O
322 | 2135..2138    ; ALetter # Lo   [4] ALEF SYMBOL..DALET SYMBOL
323 | 2139          ; ALetter # L&       INFORMATION SOURCE
324 | 213C..213F    ; ALetter # L&   [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI
325 | 2145..2149    ; ALetter # L&   [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J
326 | 214E          ; ALetter # L&       TURNED SMALL F
327 | 2160..2182    ; ALetter # Nl  [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND
328 | 2183..2184    ; ALetter # L&   [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C
329 | 24B6..24E9    ; ALetter # So  [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z
330 | 2C00..2C2E    ; ALetter # L&  [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
331 | 2C30..2C5E    ; ALetter # L&  [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE
332 | 2C60..2C6C    ; ALetter # L&  [13] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SMALL LETTER Z WITH DESCENDER
333 | 2C74..2C77    ; ALetter # L&   [4] LATIN SMALL LETTER V WITH CURL..LATIN SMALL LETTER TAILLESS PHI
334 | 2C80..2CE4    ; ALetter # L& [101] COPTIC CAPITAL LETTER ALFA..COPTIC SYMBOL KAI
335 | 2D00..2D25    ; ALetter # L&  [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE
336 | 2D30..2D65    ; ALetter # Lo  [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ
337 | 2D6F          ; ALetter # Lm       TIFINAGH MODIFIER LETTER LABIALIZATION MARK
338 | 2D80..2D96    ; ALetter # Lo  [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE
339 | 2DA0..2DA6    ; ALetter # Lo   [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO
340 | 2DA8..2DAE    ; ALetter # Lo   [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO
341 | 2DB0..2DB6    ; ALetter # Lo   [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO
342 | 2DB8..2DBE    ; ALetter # Lo   [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO
343 | 2DC0..2DC6    ; ALetter # Lo   [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO
344 | 2DC8..2DCE    ; ALetter # Lo   [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO
345 | 2DD0..2DD6    ; ALetter # Lo   [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO
346 | 2DD8..2DDE    ; ALetter # Lo   [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO
347 | 3005          ; ALetter # Lm       IDEOGRAPHIC ITERATION MARK
348 | 303B          ; ALetter # Lm       VERTICAL IDEOGRAPHIC ITERATION MARK
349 | 303C          ; ALetter # Lo       MASU MARK
350 | 3105..312C    ; ALetter # Lo  [40] BOPOMOFO LETTER B..BOPOMOFO LETTER GN
351 | 3131..318E    ; ALetter # Lo  [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE
352 | 31A0..31B7    ; ALetter # Lo  [24] BOPOMOFO LETTER BU..BOPOMOFO FINAL LETTER H
353 | A000..A014    ; ALetter # Lo  [21] YI SYLLABLE IT..YI SYLLABLE E
354 | A015          ; ALetter # Lm       YI SYLLABLE WU
355 | A016..A48C    ; ALetter # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR
356 | A717..A71A    ; ALetter # Lm   [4] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOWER RIGHT CORNER ANGLE
357 | A800..A801    ; ALetter # Lo   [2] SYLOTI NAGRI LETTER A..SYLOTI NAGRI LETTER I
358 | A803..A805    ; ALetter # Lo   [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O
359 | A807..A80A    ; ALetter # Lo   [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO
360 | A80C..A822    ; ALetter # Lo  [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO
361 | A823..A824    ; ALetter # Mc   [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I
362 | A827          ; ALetter # Mc       SYLOTI NAGRI VOWEL SIGN OO
363 | A840..A873    ; ALetter # Lo  [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU
364 | AC00..D7A3    ; ALetter # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH
365 | FA30..FA6A    ; ALetter # Lo  [59] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6A
366 | FB00..FB06    ; ALetter # L&   [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
367 | FB13..FB17    ; ALetter # L&   [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH
368 | FB1D          ; ALetter # Lo       HEBREW LETTER YOD WITH HIRIQ
369 | FB1F..FB28    ; ALetter # Lo  [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV
370 | FB2A..FB36    ; ALetter # Lo  [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH
371 | FB38..FB3C    ; ALetter # Lo   [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH
372 | FB3E          ; ALetter # Lo       HEBREW LETTER MEM WITH DAGESH
373 | FB40..FB41    ; ALetter # Lo   [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH
374 | FB43..FB44    ; ALetter # Lo   [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH
375 | FB46..FBB1    ; ALetter # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
376 | FBD3..FD3D    ; ALetter # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM
377 | FD50..FD8F    ; ALetter # Lo  [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
378 | FD92..FDC7    ; ALetter # Lo  [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
379 | FDF0..FDFB    ; ALetter # Lo  [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU
380 | FE70..FE74    ; ALetter # Lo   [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM
381 | FE76..FEFC    ; ALetter # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM
382 | FF21..FF3A    ; ALetter # L&  [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
383 | FF41..FF5A    ; ALetter # L&  [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
384 | FFA0..FFBE    ; ALetter # Lo  [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH
385 | FFC2..FFC7    ; ALetter # Lo   [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E
386 | FFCA..FFCF    ; ALetter # Lo   [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE
387 | FFD2..FFD7    ; ALetter # Lo   [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU
388 | FFDA..FFDC    ; ALetter # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
389 | 10000..1000B  ; ALetter # Lo  [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE
390 | 1000D..10026  ; ALetter # Lo  [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO
391 | 10028..1003A  ; ALetter # Lo  [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO
392 | 1003C..1003D  ; ALetter # Lo   [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE
393 | 1003F..1004D  ; ALetter # Lo  [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO
394 | 10050..1005D  ; ALetter # Lo  [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089
395 | 10080..100FA  ; ALetter # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305
396 | 10140..10174  ; ALetter # Nl  [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS
397 | 10300..1031E  ; ALetter # Lo  [31] OLD ITALIC LETTER A..OLD ITALIC LETTER UU
398 | 10330..10340  ; ALetter # Lo  [17] GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA
399 | 10341         ; ALetter # Nl       GOTHIC LETTER NINETY
400 | 10342..10349  ; ALetter # Lo   [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL
401 | 1034A         ; ALetter # Nl       GOTHIC LETTER NINE HUNDRED
402 | 10380..1039D  ; ALetter # Lo  [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU
403 | 103A0..103C3  ; ALetter # Lo  [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA
404 | 103C8..103CF  ; ALetter # Lo   [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH
405 | 103D1..103D5  ; ALetter # Nl   [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED
406 | 10400..1044F  ; ALetter # L&  [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW
407 | 10450..1049D  ; ALetter # Lo  [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO
408 | 10800..10805  ; ALetter # Lo   [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA
409 | 10808         ; ALetter # Lo       CYPRIOT SYLLABLE JO
410 | 1080A..10835  ; ALetter # Lo  [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO
411 | 10837..10838  ; ALetter # Lo   [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE
412 | 1083C         ; ALetter # Lo       CYPRIOT SYLLABLE ZA
413 | 1083F         ; ALetter # Lo       CYPRIOT SYLLABLE ZO
414 | 10900..10915  ; ALetter # Lo  [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU
415 | 10A00         ; ALetter # Lo       KHAROSHTHI LETTER A
416 | 10A10..10A13  ; ALetter # Lo   [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA
417 | 10A15..10A17  ; ALetter # Lo   [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA
418 | 10A19..10A33  ; ALetter # Lo  [27] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER TTTHA
419 | 12000..1236E  ; ALetter # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM
420 | 12400..12462  ; ALetter # Nl  [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER
421 | 1D400..1D454  ; ALetter # L&  [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G
422 | 1D456..1D49C  ; ALetter # L&  [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A
423 | 1D49E..1D49F  ; ALetter # L&   [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D
424 | 1D4A2         ; ALetter # L&       MATHEMATICAL SCRIPT CAPITAL G
425 | 1D4A5..1D4A6  ; ALetter # L&   [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K
426 | 1D4A9..1D4AC  ; ALetter # L&   [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q
427 | 1D4AE..1D4B9  ; ALetter # L&  [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D
428 | 1D4BB         ; ALetter # L&       MATHEMATICAL SCRIPT SMALL F
429 | 1D4BD..1D4C3  ; ALetter # L&   [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N
430 | 1D4C5..1D505  ; ALetter # L&  [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B
431 | 1D507..1D50A  ; ALetter # L&   [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G
432 | 1D50D..1D514  ; ALetter # L&   [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q
433 | 1D516..1D51C  ; ALetter # L&   [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y
434 | 1D51E..1D539  ; ALetter # L&  [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B
435 | 1D53B..1D53E  ; ALetter # L&   [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G
436 | 1D540..1D544  ; ALetter # L&   [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M
437 | 1D546         ; ALetter # L&       MATHEMATICAL DOUBLE-STRUCK CAPITAL O
438 | 1D54A..1D550  ; ALetter # L&   [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y
439 | 1D552..1D6A5  ; ALetter # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J
440 | 1D6A8..1D6C0  ; ALetter # L&  [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA
441 | 1D6C2..1D6DA  ; ALetter # L&  [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA
442 | 1D6DC..1D6FA  ; ALetter # L&  [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA
443 | 1D6FC..1D714  ; ALetter # L&  [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA
444 | 1D716..1D734  ; ALetter # L&  [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA
445 | 1D736..1D74E  ; ALetter # L&  [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA
446 | 1D750..1D76E  ; ALetter # L&  [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA
447 | 1D770..1D788  ; ALetter # L&  [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA
448 | 1D78A..1D7A8  ; ALetter # L&  [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA
449 | 1D7AA..1D7C2  ; ALetter # L&  [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA
450 | 1D7C4..1D7CB  ; ALetter # L&   [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA
451 | 
452 | # Total code points: 21149
453 | 
454 | # ================================================
455 | 
456 | 0027          ; MidLetter # Po       APOSTROPHE
457 | 003A          ; MidLetter # Po       COLON
458 | 00B7          ; MidLetter # Po       MIDDLE DOT
459 | 05F4          ; MidLetter # Po       HEBREW PUNCTUATION GERSHAYIM
460 | 2019          ; MidLetter # Pf       RIGHT SINGLE QUOTATION MARK
461 | 2027          ; MidLetter # Po       HYPHENATION POINT
462 | 
463 | # Total code points: 6
464 | 
465 | # ================================================
466 | 
467 | 002C          ; MidNum # Po       COMMA
468 | 002E          ; MidNum # Po       FULL STOP
469 | 003B          ; MidNum # Po       SEMICOLON
470 | 037E          ; MidNum # Po       GREEK QUESTION MARK
471 | 0589          ; MidNum # Po       ARMENIAN FULL STOP
472 | 060D          ; MidNum # Po       ARABIC DATE SEPARATOR
473 | 07F8          ; MidNum # Po       NKO COMMA
474 | 2044          ; MidNum # Sm       FRACTION SLASH
475 | FE10          ; MidNum # Po       PRESENTATION FORM FOR VERTICAL COMMA
476 | FE13..FE14    ; MidNum # Po   [2] PRESENTATION FORM FOR VERTICAL COLON..PRESENTATION FORM FOR VERTICAL SEMICOLON
477 | 
478 | # Total code points: 11
479 | 
480 | # ================================================
481 | 
482 | 0030..0039    ; Numeric # Nd  [10] DIGIT ZERO..DIGIT NINE
483 | 0660..0669    ; Numeric # Nd  [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
484 | 066B..066C    ; Numeric # Po   [2] ARABIC DECIMAL SEPARATOR..ARABIC THOUSANDS SEPARATOR
485 | 06F0..06F9    ; Numeric # Nd  [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE
486 | 07C0..07C9    ; Numeric # Nd  [10] NKO DIGIT ZERO..NKO DIGIT NINE
487 | 0966..096F    ; Numeric # Nd  [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
488 | 09E6..09EF    ; Numeric # Nd  [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE
489 | 0A66..0A6F    ; Numeric # Nd  [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE
490 | 0AE6..0AEF    ; Numeric # Nd  [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE
491 | 0B66..0B6F    ; Numeric # Nd  [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE
492 | 0BE6..0BEF    ; Numeric # Nd  [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE
493 | 0C66..0C6F    ; Numeric # Nd  [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE
494 | 0CE6..0CEF    ; Numeric # Nd  [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
495 | 0D66..0D6F    ; Numeric # Nd  [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE
496 | 0E50..0E59    ; Numeric # Nd  [10] THAI DIGIT ZERO..THAI DIGIT NINE
497 | 0ED0..0ED9    ; Numeric # Nd  [10] LAO DIGIT ZERO..LAO DIGIT NINE
498 | 0F20..0F29    ; Numeric # Nd  [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE
499 | 1040..1049    ; Numeric # Nd  [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE
500 | 17E0..17E9    ; Numeric # Nd  [10] KHMER DIGIT ZERO..KHMER DIGIT NINE
501 | 1810..1819    ; Numeric # Nd  [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE
502 | 1946..194F    ; Numeric # Nd  [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE
503 | 19D0..19D9    ; Numeric # Nd  [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE
504 | 1B50..1B59    ; Numeric # Nd  [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE
505 | 104A0..104A9  ; Numeric # Nd  [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE
506 | 1D7CE..1D7FF  ; Numeric # Nd  [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
507 | 
508 | # Total code points: 282
509 | 
510 | # ================================================
511 | 
512 | 005F          ; ExtendNumLet # Pc       LOW LINE
513 | 203F..2040    ; ExtendNumLet # Pc   [2] UNDERTIE..CHARACTER TIE
514 | 2054          ; ExtendNumLet # Pc       INVERTED UNDERTIE
515 | FE33..FE34    ; ExtendNumLet # Pc   [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE
516 | FE4D..FE4F    ; ExtendNumLet # Pc   [3] DASHED LOW LINE..WAVY LOW LINE
517 | FF3F          ; ExtendNumLet # Pc       FULLWIDTH LOW LINE
518 | 
519 | # Total code points: 10
520 | 
521 | # EOF
522 | 


--------------------------------------------------------------------------------