├── 8bit.go ├── ASCII.go ├── LICENSE ├── README.md ├── big5-data.go ├── big5.go ├── charset.go ├── convert_string.go ├── entity.go ├── entity_data.go ├── euc-jp.go ├── fallback.go ├── gb18030-data.go ├── gb18030.go ├── gbk-data.go ├── gbk.go ├── jis0201-data.go ├── jis0208-data.go ├── jis0212-data.go ├── mahonia_test.go ├── mahoniconv └── mahoniconv.go ├── mbcs.go ├── reader.go ├── shiftjis.go ├── translate.go ├── utf16.go ├── utf8.go └── writer.go /ASCII.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | // Converters for ASCII and ISO-8859-1 4 | 5 | func init() { 6 | for i := 0; i < len(asciiCharsets); i++ { 7 | RegisterCharset(&asciiCharsets[i]) 8 | } 9 | } 10 | 11 | var asciiCharsets = []Charset{ 12 | { 13 | Name: "US-ASCII", 14 | NewDecoder: func() Decoder { return decodeASCIIRune }, 15 | NewEncoder: func() Encoder { return encodeASCIIRune }, 16 | Aliases: []string{"ASCII", "US", "ISO646-US", "IBM367", "cp367", "ANSI_X3.4-1968", "iso-ir-6", "ANSI_X3.4-1986", "ISO_646.irv:1991", "csASCII"}, 17 | }, 18 | { 19 | Name: "ISO-8859-1", 20 | NewDecoder: func() Decoder { return decodeLatin1Rune }, 21 | NewEncoder: func() Encoder { return encodeLatin1Rune }, 22 | Aliases: []string{"latin1", "ISO Latin 1", "IBM819", "cp819", "ISO_8859-1:1987", "iso-ir-100", "l1", "csISOLatin1"}, 23 | }, 24 | } 25 | 26 | func decodeASCIIRune(p []byte) (c rune, size int, status Status) { 27 | if len(p) == 0 { 28 | status = NO_ROOM 29 | return 30 | } 31 | 32 | b := p[0] 33 | if b > 127 { 34 | return 0xfffd, 1, INVALID_CHAR 35 | } 36 | return rune(b), 1, SUCCESS 37 | } 38 | 39 | func encodeASCIIRune(p []byte, c rune) (size int, status Status) { 40 | if len(p) == 0 { 41 | status = NO_ROOM 42 | return 43 | } 44 | 45 | if c < 128 { 46 | p[0] = byte(c) 47 | return 1, SUCCESS 48 | } 49 | 50 | p[0] = '?' 51 | return 1, INVALID_CHAR 52 | } 53 | 54 | func decodeLatin1Rune(p []byte) (c rune, size int, status Status) { 55 | if len(p) == 0 { 56 | status = NO_ROOM 57 | return 58 | } 59 | 60 | return rune(p[0]), 1, SUCCESS 61 | } 62 | 63 | func encodeLatin1Rune(p []byte, c rune) (size int, status Status) { 64 | if len(p) == 0 { 65 | status = NO_ROOM 66 | return 67 | } 68 | 69 | if c < 256 { 70 | p[0] = byte(c) 71 | return 1, SUCCESS 72 | } 73 | 74 | p[0] = '?' 75 | return 1, INVALID_CHAR 76 | } 77 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | https://opensource.org/licenses/BSD-3-Clause 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | mahonia 2 | ======= 3 | 4 | character-set conversion library implemented in Go. 5 | 6 | Mahonia is a character-set conversion library implemented in Go. 7 | All data is compiled into the executable; it doesn't need any external data files. 8 | 9 | based on http://code.google.com/p/mahonia/ 10 | 11 | install 12 | ------- 13 | 14 | go get github.com/axgle/mahonia 15 | 16 | example 17 | ------- 18 | 19 | package main 20 | import "fmt" 21 | import "github.com/axgle/mahonia" 22 | func main(){ 23 | enc:=mahonia.NewEncoder("gbk") 24 | //converts a string from UTF-8 to gbk encoding. 25 | fmt.Println(enc.ConvertString("hello,世界")) 26 | } 27 | 28 | donate 29 | ------- 30 | https://github.com/axgle/mahonia/wiki/Donate 31 | -------------------------------------------------------------------------------- /big5.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | // Converters for Big 5 encoding. 4 | 5 | import ( 6 | "sync" 7 | ) 8 | 9 | func init() { 10 | RegisterCharset(&Charset{ 11 | Name: "Big5", 12 | Aliases: []string{"csBig5"}, 13 | NewDecoder: func() Decoder { 14 | return decodeBig5Rune 15 | }, 16 | NewEncoder: func() Encoder { 17 | big5Once.Do(reverseBig5Table) 18 | return encodeBig5Rune 19 | }, 20 | }) 21 | } 22 | 23 | func decodeBig5Rune(p []byte) (r rune, size int, status Status) { 24 | if len(p) == 0 { 25 | status = NO_ROOM 26 | return 27 | } 28 | 29 | b := p[0] 30 | if b < 128 { 31 | return rune(b), 1, SUCCESS 32 | } 33 | 34 | if len(p) < 2 { 35 | status = NO_ROOM 36 | return 37 | } 38 | 39 | c := int(p[0])<<8 + int(p[1]) 40 | c = int(big5ToUnicode[c]) 41 | if c > 0 { 42 | return rune(c), 2, SUCCESS 43 | } 44 | 45 | return 0xfffd, 1, INVALID_CHAR 46 | } 47 | 48 | func encodeBig5Rune(p []byte, r rune) (size int, status Status) { 49 | if len(p) == 0 { 50 | status = NO_ROOM 51 | return 52 | } 53 | 54 | if r < 128 { 55 | p[0] = byte(r) 56 | return 1, SUCCESS 57 | } 58 | 59 | if len(p) < 2 { 60 | status = NO_ROOM 61 | return 62 | } 63 | 64 | if r < 0x10000 { 65 | c := unicodeToBig5[r] 66 | if c > 0 { 67 | p[0] = byte(c >> 8) 68 | p[1] = byte(c) 69 | return 2, SUCCESS 70 | } 71 | } 72 | 73 | p[0] = '?' 74 | return 1, INVALID_CHAR 75 | } 76 | 77 | var big5Once sync.Once 78 | 79 | var unicodeToBig5 []uint16 80 | 81 | func reverseBig5Table() { 82 | unicodeToBig5 = make([]uint16, 65536) 83 | 84 | for big5, unicode := range big5ToUnicode { 85 | if unicode > 0 { 86 | unicodeToBig5[unicode] = uint16(big5) 87 | } 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /charset.go: -------------------------------------------------------------------------------- 1 | // This package is a character-set conversion library for Go. 2 | package mahonia 3 | 4 | import ( 5 | "bytes" 6 | "unicode" 7 | ) 8 | 9 | // Status is the type for the status return value from a Decoder or Encoder. 10 | type Status int 11 | 12 | const ( 13 | // SUCCESS means that the character was converted with no problems. 14 | SUCCESS = Status(iota) 15 | 16 | // INVALID_CHAR means that the source contained invalid bytes, or that the character 17 | // could not be represented in the destination encoding. 18 | // The Encoder or Decoder should have output a substitute character. 19 | INVALID_CHAR 20 | 21 | // NO_ROOM means there were not enough input bytes to form a complete character, 22 | // or there was not enough room in the output buffer to write a complete character. 23 | // No bytes were written, and no internal state was changed in the Encoder or Decoder. 24 | NO_ROOM 25 | 26 | // STATE_ONLY means that bytes were read or written indicating a state transition, 27 | // but no actual character was processed. (Examples: byte order marks, ISO-2022 escape sequences) 28 | STATE_ONLY 29 | ) 30 | 31 | // A Decoder is a function that decodes a character set, one character at a time. 32 | // It works much like utf8.DecodeRune, but has an aditional status return value. 33 | type Decoder func(p []byte) (c rune, size int, status Status) 34 | 35 | // An Encoder is a function that encodes a character set, one character at a time. 36 | // It works much like utf8.EncodeRune, but has an additional status return value. 37 | type Encoder func(p []byte, c rune) (size int, status Status) 38 | 39 | // A Charset represents a character set that can be converted, and contains functions 40 | // to create Converters to encode and decode strings in that character set. 41 | type Charset struct { 42 | // Name is the character set's canonical name. 43 | Name string 44 | 45 | // Aliases returns a list of alternate names. 46 | Aliases []string 47 | 48 | // NewDecoder returns a Decoder to convert from the charset to Unicode. 49 | NewDecoder func() Decoder 50 | 51 | // NewEncoder returns an Encoder to convert from Unicode to the charset. 52 | NewEncoder func() Encoder 53 | } 54 | 55 | // The charsets are stored in charsets under their canonical names. 56 | var charsets = make(map[string]*Charset) 57 | 58 | // aliases maps their aliases to their canonical names. 59 | var aliases = make(map[string]string) 60 | 61 | // simplifyName converts a name to lower case and removes non-alphanumeric characters. 62 | // This is how the names are used as keys to the maps. 63 | func simplifyName(name string) string { 64 | var buf bytes.Buffer 65 | for _, c := range name { 66 | switch { 67 | case unicode.IsDigit(c): 68 | buf.WriteRune(c) 69 | case unicode.IsLetter(c): 70 | buf.WriteRune(unicode.ToLower(c)) 71 | default: 72 | 73 | } 74 | } 75 | 76 | return buf.String() 77 | } 78 | 79 | // RegisterCharset adds a charset to the charsetMap. 80 | func RegisterCharset(cs *Charset) { 81 | name := cs.Name 82 | charsets[name] = cs 83 | aliases[simplifyName(name)] = name 84 | for _, alias := range cs.Aliases { 85 | aliases[simplifyName(alias)] = name 86 | } 87 | } 88 | 89 | // GetCharset fetches a charset by name. 90 | // If the name is not found, it returns nil. 91 | func GetCharset(name string) *Charset { 92 | return charsets[aliases[simplifyName(name)]] 93 | } 94 | 95 | // NewDecoder returns a Decoder to decode the named charset. 96 | // If the name is not found, it returns nil. 97 | func NewDecoder(name string) Decoder { 98 | cs := GetCharset(name) 99 | if cs == nil { 100 | return nil 101 | } 102 | return cs.NewDecoder() 103 | } 104 | 105 | // NewEncoder returns an Encoder to encode the named charset. 106 | func NewEncoder(name string) Encoder { 107 | cs := GetCharset(name) 108 | if cs == nil { 109 | return nil 110 | } 111 | return cs.NewEncoder() 112 | } 113 | -------------------------------------------------------------------------------- /convert_string.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | // ConvertString converts a string from UTF-8 to e's encoding. 4 | func (e Encoder) ConvertString(s string) string { 5 | dest := make([]byte, len(s)+10) 6 | destPos := 0 7 | 8 | for _, rune := range s { 9 | retry: 10 | size, status := e(dest[destPos:], rune) 11 | 12 | if status == NO_ROOM { 13 | newDest := make([]byte, len(dest)*2) 14 | copy(newDest, dest) 15 | dest = newDest 16 | goto retry 17 | } 18 | 19 | if status == STATE_ONLY { 20 | destPos += size 21 | goto retry 22 | } 23 | 24 | destPos += size 25 | } 26 | 27 | return string(dest[:destPos]) 28 | } 29 | 30 | // ConvertString converts a string from d's encoding to UTF-8. 31 | func (d Decoder) ConvertString(s string) string { 32 | bytes := []byte(s) 33 | runes := make([]rune, len(s)) 34 | destPos := 0 35 | 36 | for len(bytes) > 0 { 37 | c, size, status := d(bytes) 38 | 39 | if status == STATE_ONLY { 40 | bytes = bytes[size:] 41 | continue 42 | } 43 | 44 | if status == NO_ROOM { 45 | c = 0xfffd 46 | size = len(bytes) 47 | status = INVALID_CHAR 48 | } 49 | 50 | bytes = bytes[size:] 51 | runes[destPos] = c 52 | destPos++ 53 | } 54 | 55 | return string(runes[:destPos]) 56 | } 57 | -------------------------------------------------------------------------------- /entity.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | // decoding HTML entities 4 | 5 | import ( 6 | "sync" 7 | ) 8 | 9 | var entityOnce sync.Once 10 | 11 | // entityTrie is similar to mbcsTrie, but not identical. 12 | type htmlEntityTrie struct { 13 | runes [2]rune // Some HTML entities decode to two characters. 14 | children []htmlEntityTrie 15 | } 16 | 17 | var entityTrie htmlEntityTrie 18 | 19 | func buildEntityTrie() { 20 | for e, c := range entity { 21 | current := &entityTrie 22 | for i := 0; i < len(e); i++ { 23 | if current.children == nil { 24 | current.children = make([]htmlEntityTrie, 256) 25 | } 26 | current = ¤t.children[e[i]] 27 | } 28 | current.runes[0] = c 29 | } 30 | 31 | for e, runes := range entity2 { 32 | current := &entityTrie 33 | for i := 0; i < len(e); i++ { 34 | if current.children == nil { 35 | current.children = make([]htmlEntityTrie, 256) 36 | } 37 | current = ¤t.children[e[i]] 38 | } 39 | current.runes = runes 40 | } 41 | } 42 | 43 | // EntityDecoder returns a Decoder that decodes HTML character entities. 44 | // If there is no valid character entity at the current position, it returns INVALID_CHAR. 45 | // So it needs to be combined with another Decoder via FallbackDecoder. 46 | func EntityDecoder() Decoder { 47 | entityOnce.Do(buildEntityTrie) 48 | var leftover rune // leftover rune from two-rune entity 49 | return func(p []byte) (r rune, size int, status Status) { 50 | if leftover != 0 { 51 | r = leftover 52 | leftover = 0 53 | return r, 0, SUCCESS 54 | } 55 | 56 | if len(p) == 0 { 57 | return 0, 0, NO_ROOM 58 | } 59 | 60 | if p[0] != '&' { 61 | return 0xfffd, 1, INVALID_CHAR 62 | } 63 | 64 | if len(p) < 3 { 65 | return 0, 1, NO_ROOM 66 | } 67 | 68 | r, size, status = 0xfffd, 1, INVALID_CHAR 69 | n := 1 // number of bytes read so far 70 | 71 | if p[n] == '#' { 72 | n++ 73 | c := p[n] 74 | hex := false 75 | if c == 'x' || c == 'X' { 76 | hex = true 77 | n++ 78 | } 79 | 80 | var x rune 81 | for n < len(p) { 82 | c = p[n] 83 | n++ 84 | if hex { 85 | if '0' <= c && c <= '9' { 86 | x = 16*x + rune(c) - '0' 87 | continue 88 | } else if 'a' <= c && c <= 'f' { 89 | x = 16*x + rune(c) - 'a' + 10 90 | continue 91 | } else if 'A' <= c && c <= 'F' { 92 | x = 16*x + rune(c) - 'A' + 10 93 | continue 94 | } 95 | } else if '0' <= c && c <= '9' { 96 | x = 10*x + rune(c) - '0' 97 | continue 98 | } 99 | if c != ';' { 100 | n-- 101 | } 102 | break 103 | } 104 | 105 | if n == len(p) && p[n-1] != ';' { 106 | return 0, 0, NO_ROOM 107 | } 108 | 109 | size = n 110 | if p[n-1] == ';' { 111 | n-- 112 | } 113 | if hex { 114 | n-- 115 | } 116 | n-- 117 | // Now n is the number of actual digits read. 118 | if n == 0 { 119 | return 0xfffd, 1, INVALID_CHAR 120 | } 121 | 122 | if 0x80 <= x && x <= 0x9F { 123 | // Replace characters from Windows-1252 with UTF-8 equivalents. 124 | x = replacementTable[x-0x80] 125 | } else if x == 0 || (0xD800 <= x && x <= 0xDFFF) || x > 0x10FFFF { 126 | // Replace invalid characters with the replacement character. 127 | return 0xfffd, size, INVALID_CHAR 128 | } 129 | 130 | r = x 131 | status = SUCCESS 132 | return 133 | } 134 | 135 | current := &entityTrie 136 | for current.children != nil { 137 | if len(p) <= n { 138 | leftover = 0 139 | return 0, 0, NO_ROOM 140 | } 141 | 142 | current = ¤t.children[p[n]] 143 | n++ 144 | if current.runes[0] != 0 { 145 | r, leftover = current.runes[0], current.runes[1] 146 | size = n 147 | status = SUCCESS 148 | // but don't return yet, since we need the longest match 149 | } 150 | } 151 | 152 | return 153 | } 154 | } 155 | 156 | // This table is copied from /src/pkg/html/escape.go in the Go source 157 | // 158 | // These replacements permit compatibility with old numeric entities that 159 | // assumed Windows-1252 encoding. 160 | // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#consume-a-character-reference 161 | var replacementTable = [...]rune{ 162 | '\u20AC', // First entry is what 0x80 should be replaced with. 163 | '\u0081', 164 | '\u201A', 165 | '\u0192', 166 | '\u201E', 167 | '\u2026', 168 | '\u2020', 169 | '\u2021', 170 | '\u02C6', 171 | '\u2030', 172 | '\u0160', 173 | '\u2039', 174 | '\u0152', 175 | '\u008D', 176 | '\u017D', 177 | '\u008F', 178 | '\u0090', 179 | '\u2018', 180 | '\u2019', 181 | '\u201C', 182 | '\u201D', 183 | '\u2022', 184 | '\u2013', 185 | '\u2014', 186 | '\u02DC', 187 | '\u2122', 188 | '\u0161', 189 | '\u203A', 190 | '\u0153', 191 | '\u009D', 192 | '\u017E', 193 | '\u0178', // Last entry is 0x9F. 194 | // 0x00->'\uFFFD' is handled programmatically. 195 | // 0x0D->'\u000D' is a no-op. 196 | } 197 | -------------------------------------------------------------------------------- /entity_data.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | // Taken from /src/pkg/html/entity.go in the Go source code. 4 | 5 | // Copyright 2010 The Go Authors. All rights reserved. 6 | // Use of this source code is governed by a BSD-style 7 | // license that can be found in the LICENSE file. 8 | 9 | //package html 10 | 11 | // entity is a map from HTML entity names to their values. The semicolon matters: 12 | // http://www.whatwg.org/specs/web-apps/current-work/multipage/named-character-references.html 13 | // lists both "amp" and "amp;" as two separate entries. 14 | // 15 | // Note that the HTML5 list is larger than the HTML4 list at 16 | // http://www.w3.org/TR/html4/sgml/entities.html 17 | var entity = map[string]rune{ 18 | "AElig;": '\U000000C6', 19 | "AMP;": '\U00000026', 20 | "Aacute;": '\U000000C1', 21 | "Abreve;": '\U00000102', 22 | "Acirc;": '\U000000C2', 23 | "Acy;": '\U00000410', 24 | "Afr;": '\U0001D504', 25 | "Agrave;": '\U000000C0', 26 | "Alpha;": '\U00000391', 27 | "Amacr;": '\U00000100', 28 | "And;": '\U00002A53', 29 | "Aogon;": '\U00000104', 30 | "Aopf;": '\U0001D538', 31 | "ApplyFunction;": '\U00002061', 32 | "Aring;": '\U000000C5', 33 | "Ascr;": '\U0001D49C', 34 | "Assign;": '\U00002254', 35 | "Atilde;": '\U000000C3', 36 | "Auml;": '\U000000C4', 37 | "Backslash;": '\U00002216', 38 | "Barv;": '\U00002AE7', 39 | "Barwed;": '\U00002306', 40 | "Bcy;": '\U00000411', 41 | "Because;": '\U00002235', 42 | "Bernoullis;": '\U0000212C', 43 | "Beta;": '\U00000392', 44 | "Bfr;": '\U0001D505', 45 | "Bopf;": '\U0001D539', 46 | "Breve;": '\U000002D8', 47 | "Bscr;": '\U0000212C', 48 | "Bumpeq;": '\U0000224E', 49 | "CHcy;": '\U00000427', 50 | "COPY;": '\U000000A9', 51 | "Cacute;": '\U00000106', 52 | "Cap;": '\U000022D2', 53 | "CapitalDifferentialD;": '\U00002145', 54 | "Cayleys;": '\U0000212D', 55 | "Ccaron;": '\U0000010C', 56 | "Ccedil;": '\U000000C7', 57 | "Ccirc;": '\U00000108', 58 | "Cconint;": '\U00002230', 59 | "Cdot;": '\U0000010A', 60 | "Cedilla;": '\U000000B8', 61 | "CenterDot;": '\U000000B7', 62 | "Cfr;": '\U0000212D', 63 | "Chi;": '\U000003A7', 64 | "CircleDot;": '\U00002299', 65 | "CircleMinus;": '\U00002296', 66 | "CirclePlus;": '\U00002295', 67 | "CircleTimes;": '\U00002297', 68 | "ClockwiseContourIntegral;": '\U00002232', 69 | "CloseCurlyDoubleQuote;": '\U0000201D', 70 | "CloseCurlyQuote;": '\U00002019', 71 | "Colon;": '\U00002237', 72 | "Colone;": '\U00002A74', 73 | "Congruent;": '\U00002261', 74 | "Conint;": '\U0000222F', 75 | "ContourIntegral;": '\U0000222E', 76 | "Copf;": '\U00002102', 77 | "Coproduct;": '\U00002210', 78 | "CounterClockwiseContourIntegral;": '\U00002233', 79 | "Cross;": '\U00002A2F', 80 | "Cscr;": '\U0001D49E', 81 | "Cup;": '\U000022D3', 82 | "CupCap;": '\U0000224D', 83 | "DD;": '\U00002145', 84 | "DDotrahd;": '\U00002911', 85 | "DJcy;": '\U00000402', 86 | "DScy;": '\U00000405', 87 | "DZcy;": '\U0000040F', 88 | "Dagger;": '\U00002021', 89 | "Darr;": '\U000021A1', 90 | "Dashv;": '\U00002AE4', 91 | "Dcaron;": '\U0000010E', 92 | "Dcy;": '\U00000414', 93 | "Del;": '\U00002207', 94 | "Delta;": '\U00000394', 95 | "Dfr;": '\U0001D507', 96 | "DiacriticalAcute;": '\U000000B4', 97 | "DiacriticalDot;": '\U000002D9', 98 | "DiacriticalDoubleAcute;": '\U000002DD', 99 | "DiacriticalGrave;": '\U00000060', 100 | "DiacriticalTilde;": '\U000002DC', 101 | "Diamond;": '\U000022C4', 102 | "DifferentialD;": '\U00002146', 103 | "Dopf;": '\U0001D53B', 104 | "Dot;": '\U000000A8', 105 | "DotDot;": '\U000020DC', 106 | "DotEqual;": '\U00002250', 107 | "DoubleContourIntegral;": '\U0000222F', 108 | "DoubleDot;": '\U000000A8', 109 | "DoubleDownArrow;": '\U000021D3', 110 | "DoubleLeftArrow;": '\U000021D0', 111 | "DoubleLeftRightArrow;": '\U000021D4', 112 | "DoubleLeftTee;": '\U00002AE4', 113 | "DoubleLongLeftArrow;": '\U000027F8', 114 | "DoubleLongLeftRightArrow;": '\U000027FA', 115 | "DoubleLongRightArrow;": '\U000027F9', 116 | "DoubleRightArrow;": '\U000021D2', 117 | "DoubleRightTee;": '\U000022A8', 118 | "DoubleUpArrow;": '\U000021D1', 119 | "DoubleUpDownArrow;": '\U000021D5', 120 | "DoubleVerticalBar;": '\U00002225', 121 | "DownArrow;": '\U00002193', 122 | "DownArrowBar;": '\U00002913', 123 | "DownArrowUpArrow;": '\U000021F5', 124 | "DownBreve;": '\U00000311', 125 | "DownLeftRightVector;": '\U00002950', 126 | "DownLeftTeeVector;": '\U0000295E', 127 | "DownLeftVector;": '\U000021BD', 128 | "DownLeftVectorBar;": '\U00002956', 129 | "DownRightTeeVector;": '\U0000295F', 130 | "DownRightVector;": '\U000021C1', 131 | "DownRightVectorBar;": '\U00002957', 132 | "DownTee;": '\U000022A4', 133 | "DownTeeArrow;": '\U000021A7', 134 | "Downarrow;": '\U000021D3', 135 | "Dscr;": '\U0001D49F', 136 | "Dstrok;": '\U00000110', 137 | "ENG;": '\U0000014A', 138 | "ETH;": '\U000000D0', 139 | "Eacute;": '\U000000C9', 140 | "Ecaron;": '\U0000011A', 141 | "Ecirc;": '\U000000CA', 142 | "Ecy;": '\U0000042D', 143 | "Edot;": '\U00000116', 144 | "Efr;": '\U0001D508', 145 | "Egrave;": '\U000000C8', 146 | "Element;": '\U00002208', 147 | "Emacr;": '\U00000112', 148 | "EmptySmallSquare;": '\U000025FB', 149 | "EmptyVerySmallSquare;": '\U000025AB', 150 | "Eogon;": '\U00000118', 151 | "Eopf;": '\U0001D53C', 152 | "Epsilon;": '\U00000395', 153 | "Equal;": '\U00002A75', 154 | "EqualTilde;": '\U00002242', 155 | "Equilibrium;": '\U000021CC', 156 | "Escr;": '\U00002130', 157 | "Esim;": '\U00002A73', 158 | "Eta;": '\U00000397', 159 | "Euml;": '\U000000CB', 160 | "Exists;": '\U00002203', 161 | "ExponentialE;": '\U00002147', 162 | "Fcy;": '\U00000424', 163 | "Ffr;": '\U0001D509', 164 | "FilledSmallSquare;": '\U000025FC', 165 | "FilledVerySmallSquare;": '\U000025AA', 166 | "Fopf;": '\U0001D53D', 167 | "ForAll;": '\U00002200', 168 | "Fouriertrf;": '\U00002131', 169 | "Fscr;": '\U00002131', 170 | "GJcy;": '\U00000403', 171 | "GT;": '\U0000003E', 172 | "Gamma;": '\U00000393', 173 | "Gammad;": '\U000003DC', 174 | "Gbreve;": '\U0000011E', 175 | "Gcedil;": '\U00000122', 176 | "Gcirc;": '\U0000011C', 177 | "Gcy;": '\U00000413', 178 | "Gdot;": '\U00000120', 179 | "Gfr;": '\U0001D50A', 180 | "Gg;": '\U000022D9', 181 | "Gopf;": '\U0001D53E', 182 | "GreaterEqual;": '\U00002265', 183 | "GreaterEqualLess;": '\U000022DB', 184 | "GreaterFullEqual;": '\U00002267', 185 | "GreaterGreater;": '\U00002AA2', 186 | "GreaterLess;": '\U00002277', 187 | "GreaterSlantEqual;": '\U00002A7E', 188 | "GreaterTilde;": '\U00002273', 189 | "Gscr;": '\U0001D4A2', 190 | "Gt;": '\U0000226B', 191 | "HARDcy;": '\U0000042A', 192 | "Hacek;": '\U000002C7', 193 | "Hat;": '\U0000005E', 194 | "Hcirc;": '\U00000124', 195 | "Hfr;": '\U0000210C', 196 | "HilbertSpace;": '\U0000210B', 197 | "Hopf;": '\U0000210D', 198 | "HorizontalLine;": '\U00002500', 199 | "Hscr;": '\U0000210B', 200 | "Hstrok;": '\U00000126', 201 | "HumpDownHump;": '\U0000224E', 202 | "HumpEqual;": '\U0000224F', 203 | "IEcy;": '\U00000415', 204 | "IJlig;": '\U00000132', 205 | "IOcy;": '\U00000401', 206 | "Iacute;": '\U000000CD', 207 | "Icirc;": '\U000000CE', 208 | "Icy;": '\U00000418', 209 | "Idot;": '\U00000130', 210 | "Ifr;": '\U00002111', 211 | "Igrave;": '\U000000CC', 212 | "Im;": '\U00002111', 213 | "Imacr;": '\U0000012A', 214 | "ImaginaryI;": '\U00002148', 215 | "Implies;": '\U000021D2', 216 | "Int;": '\U0000222C', 217 | "Integral;": '\U0000222B', 218 | "Intersection;": '\U000022C2', 219 | "InvisibleComma;": '\U00002063', 220 | "InvisibleTimes;": '\U00002062', 221 | "Iogon;": '\U0000012E', 222 | "Iopf;": '\U0001D540', 223 | "Iota;": '\U00000399', 224 | "Iscr;": '\U00002110', 225 | "Itilde;": '\U00000128', 226 | "Iukcy;": '\U00000406', 227 | "Iuml;": '\U000000CF', 228 | "Jcirc;": '\U00000134', 229 | "Jcy;": '\U00000419', 230 | "Jfr;": '\U0001D50D', 231 | "Jopf;": '\U0001D541', 232 | "Jscr;": '\U0001D4A5', 233 | "Jsercy;": '\U00000408', 234 | "Jukcy;": '\U00000404', 235 | "KHcy;": '\U00000425', 236 | "KJcy;": '\U0000040C', 237 | "Kappa;": '\U0000039A', 238 | "Kcedil;": '\U00000136', 239 | "Kcy;": '\U0000041A', 240 | "Kfr;": '\U0001D50E', 241 | "Kopf;": '\U0001D542', 242 | "Kscr;": '\U0001D4A6', 243 | "LJcy;": '\U00000409', 244 | "LT;": '\U0000003C', 245 | "Lacute;": '\U00000139', 246 | "Lambda;": '\U0000039B', 247 | "Lang;": '\U000027EA', 248 | "Laplacetrf;": '\U00002112', 249 | "Larr;": '\U0000219E', 250 | "Lcaron;": '\U0000013D', 251 | "Lcedil;": '\U0000013B', 252 | "Lcy;": '\U0000041B', 253 | "LeftAngleBracket;": '\U000027E8', 254 | "LeftArrow;": '\U00002190', 255 | "LeftArrowBar;": '\U000021E4', 256 | "LeftArrowRightArrow;": '\U000021C6', 257 | "LeftCeiling;": '\U00002308', 258 | "LeftDoubleBracket;": '\U000027E6', 259 | "LeftDownTeeVector;": '\U00002961', 260 | "LeftDownVector;": '\U000021C3', 261 | "LeftDownVectorBar;": '\U00002959', 262 | "LeftFloor;": '\U0000230A', 263 | "LeftRightArrow;": '\U00002194', 264 | "LeftRightVector;": '\U0000294E', 265 | "LeftTee;": '\U000022A3', 266 | "LeftTeeArrow;": '\U000021A4', 267 | "LeftTeeVector;": '\U0000295A', 268 | "LeftTriangle;": '\U000022B2', 269 | "LeftTriangleBar;": '\U000029CF', 270 | "LeftTriangleEqual;": '\U000022B4', 271 | "LeftUpDownVector;": '\U00002951', 272 | "LeftUpTeeVector;": '\U00002960', 273 | "LeftUpVector;": '\U000021BF', 274 | "LeftUpVectorBar;": '\U00002958', 275 | "LeftVector;": '\U000021BC', 276 | "LeftVectorBar;": '\U00002952', 277 | "Leftarrow;": '\U000021D0', 278 | "Leftrightarrow;": '\U000021D4', 279 | "LessEqualGreater;": '\U000022DA', 280 | "LessFullEqual;": '\U00002266', 281 | "LessGreater;": '\U00002276', 282 | "LessLess;": '\U00002AA1', 283 | "LessSlantEqual;": '\U00002A7D', 284 | "LessTilde;": '\U00002272', 285 | "Lfr;": '\U0001D50F', 286 | "Ll;": '\U000022D8', 287 | "Lleftarrow;": '\U000021DA', 288 | "Lmidot;": '\U0000013F', 289 | "LongLeftArrow;": '\U000027F5', 290 | "LongLeftRightArrow;": '\U000027F7', 291 | "LongRightArrow;": '\U000027F6', 292 | "Longleftarrow;": '\U000027F8', 293 | "Longleftrightarrow;": '\U000027FA', 294 | "Longrightarrow;": '\U000027F9', 295 | "Lopf;": '\U0001D543', 296 | "LowerLeftArrow;": '\U00002199', 297 | "LowerRightArrow;": '\U00002198', 298 | "Lscr;": '\U00002112', 299 | "Lsh;": '\U000021B0', 300 | "Lstrok;": '\U00000141', 301 | "Lt;": '\U0000226A', 302 | "Map;": '\U00002905', 303 | "Mcy;": '\U0000041C', 304 | "MediumSpace;": '\U0000205F', 305 | "Mellintrf;": '\U00002133', 306 | "Mfr;": '\U0001D510', 307 | "MinusPlus;": '\U00002213', 308 | "Mopf;": '\U0001D544', 309 | "Mscr;": '\U00002133', 310 | "Mu;": '\U0000039C', 311 | "NJcy;": '\U0000040A', 312 | "Nacute;": '\U00000143', 313 | "Ncaron;": '\U00000147', 314 | "Ncedil;": '\U00000145', 315 | "Ncy;": '\U0000041D', 316 | "NegativeMediumSpace;": '\U0000200B', 317 | "NegativeThickSpace;": '\U0000200B', 318 | "NegativeThinSpace;": '\U0000200B', 319 | "NegativeVeryThinSpace;": '\U0000200B', 320 | "NestedGreaterGreater;": '\U0000226B', 321 | "NestedLessLess;": '\U0000226A', 322 | "NewLine;": '\U0000000A', 323 | "Nfr;": '\U0001D511', 324 | "NoBreak;": '\U00002060', 325 | "NonBreakingSpace;": '\U000000A0', 326 | "Nopf;": '\U00002115', 327 | "Not;": '\U00002AEC', 328 | "NotCongruent;": '\U00002262', 329 | "NotCupCap;": '\U0000226D', 330 | "NotDoubleVerticalBar;": '\U00002226', 331 | "NotElement;": '\U00002209', 332 | "NotEqual;": '\U00002260', 333 | "NotExists;": '\U00002204', 334 | "NotGreater;": '\U0000226F', 335 | "NotGreaterEqual;": '\U00002271', 336 | "NotGreaterLess;": '\U00002279', 337 | "NotGreaterTilde;": '\U00002275', 338 | "NotLeftTriangle;": '\U000022EA', 339 | "NotLeftTriangleEqual;": '\U000022EC', 340 | "NotLess;": '\U0000226E', 341 | "NotLessEqual;": '\U00002270', 342 | "NotLessGreater;": '\U00002278', 343 | "NotLessTilde;": '\U00002274', 344 | "NotPrecedes;": '\U00002280', 345 | "NotPrecedesSlantEqual;": '\U000022E0', 346 | "NotReverseElement;": '\U0000220C', 347 | "NotRightTriangle;": '\U000022EB', 348 | "NotRightTriangleEqual;": '\U000022ED', 349 | "NotSquareSubsetEqual;": '\U000022E2', 350 | "NotSquareSupersetEqual;": '\U000022E3', 351 | "NotSubsetEqual;": '\U00002288', 352 | "NotSucceeds;": '\U00002281', 353 | "NotSucceedsSlantEqual;": '\U000022E1', 354 | "NotSupersetEqual;": '\U00002289', 355 | "NotTilde;": '\U00002241', 356 | "NotTildeEqual;": '\U00002244', 357 | "NotTildeFullEqual;": '\U00002247', 358 | "NotTildeTilde;": '\U00002249', 359 | "NotVerticalBar;": '\U00002224', 360 | "Nscr;": '\U0001D4A9', 361 | "Ntilde;": '\U000000D1', 362 | "Nu;": '\U0000039D', 363 | "OElig;": '\U00000152', 364 | "Oacute;": '\U000000D3', 365 | "Ocirc;": '\U000000D4', 366 | "Ocy;": '\U0000041E', 367 | "Odblac;": '\U00000150', 368 | "Ofr;": '\U0001D512', 369 | "Ograve;": '\U000000D2', 370 | "Omacr;": '\U0000014C', 371 | "Omega;": '\U000003A9', 372 | "Omicron;": '\U0000039F', 373 | "Oopf;": '\U0001D546', 374 | "OpenCurlyDoubleQuote;": '\U0000201C', 375 | "OpenCurlyQuote;": '\U00002018', 376 | "Or;": '\U00002A54', 377 | "Oscr;": '\U0001D4AA', 378 | "Oslash;": '\U000000D8', 379 | "Otilde;": '\U000000D5', 380 | "Otimes;": '\U00002A37', 381 | "Ouml;": '\U000000D6', 382 | "OverBar;": '\U0000203E', 383 | "OverBrace;": '\U000023DE', 384 | "OverBracket;": '\U000023B4', 385 | "OverParenthesis;": '\U000023DC', 386 | "PartialD;": '\U00002202', 387 | "Pcy;": '\U0000041F', 388 | "Pfr;": '\U0001D513', 389 | "Phi;": '\U000003A6', 390 | "Pi;": '\U000003A0', 391 | "PlusMinus;": '\U000000B1', 392 | "Poincareplane;": '\U0000210C', 393 | "Popf;": '\U00002119', 394 | "Pr;": '\U00002ABB', 395 | "Precedes;": '\U0000227A', 396 | "PrecedesEqual;": '\U00002AAF', 397 | "PrecedesSlantEqual;": '\U0000227C', 398 | "PrecedesTilde;": '\U0000227E', 399 | "Prime;": '\U00002033', 400 | "Product;": '\U0000220F', 401 | "Proportion;": '\U00002237', 402 | "Proportional;": '\U0000221D', 403 | "Pscr;": '\U0001D4AB', 404 | "Psi;": '\U000003A8', 405 | "QUOT;": '\U00000022', 406 | "Qfr;": '\U0001D514', 407 | "Qopf;": '\U0000211A', 408 | "Qscr;": '\U0001D4AC', 409 | "RBarr;": '\U00002910', 410 | "REG;": '\U000000AE', 411 | "Racute;": '\U00000154', 412 | "Rang;": '\U000027EB', 413 | "Rarr;": '\U000021A0', 414 | "Rarrtl;": '\U00002916', 415 | "Rcaron;": '\U00000158', 416 | "Rcedil;": '\U00000156', 417 | "Rcy;": '\U00000420', 418 | "Re;": '\U0000211C', 419 | "ReverseElement;": '\U0000220B', 420 | "ReverseEquilibrium;": '\U000021CB', 421 | "ReverseUpEquilibrium;": '\U0000296F', 422 | "Rfr;": '\U0000211C', 423 | "Rho;": '\U000003A1', 424 | "RightAngleBracket;": '\U000027E9', 425 | "RightArrow;": '\U00002192', 426 | "RightArrowBar;": '\U000021E5', 427 | "RightArrowLeftArrow;": '\U000021C4', 428 | "RightCeiling;": '\U00002309', 429 | "RightDoubleBracket;": '\U000027E7', 430 | "RightDownTeeVector;": '\U0000295D', 431 | "RightDownVector;": '\U000021C2', 432 | "RightDownVectorBar;": '\U00002955', 433 | "RightFloor;": '\U0000230B', 434 | "RightTee;": '\U000022A2', 435 | "RightTeeArrow;": '\U000021A6', 436 | "RightTeeVector;": '\U0000295B', 437 | "RightTriangle;": '\U000022B3', 438 | "RightTriangleBar;": '\U000029D0', 439 | "RightTriangleEqual;": '\U000022B5', 440 | "RightUpDownVector;": '\U0000294F', 441 | "RightUpTeeVector;": '\U0000295C', 442 | "RightUpVector;": '\U000021BE', 443 | "RightUpVectorBar;": '\U00002954', 444 | "RightVector;": '\U000021C0', 445 | "RightVectorBar;": '\U00002953', 446 | "Rightarrow;": '\U000021D2', 447 | "Ropf;": '\U0000211D', 448 | "RoundImplies;": '\U00002970', 449 | "Rrightarrow;": '\U000021DB', 450 | "Rscr;": '\U0000211B', 451 | "Rsh;": '\U000021B1', 452 | "RuleDelayed;": '\U000029F4', 453 | "SHCHcy;": '\U00000429', 454 | "SHcy;": '\U00000428', 455 | "SOFTcy;": '\U0000042C', 456 | "Sacute;": '\U0000015A', 457 | "Sc;": '\U00002ABC', 458 | "Scaron;": '\U00000160', 459 | "Scedil;": '\U0000015E', 460 | "Scirc;": '\U0000015C', 461 | "Scy;": '\U00000421', 462 | "Sfr;": '\U0001D516', 463 | "ShortDownArrow;": '\U00002193', 464 | "ShortLeftArrow;": '\U00002190', 465 | "ShortRightArrow;": '\U00002192', 466 | "ShortUpArrow;": '\U00002191', 467 | "Sigma;": '\U000003A3', 468 | "SmallCircle;": '\U00002218', 469 | "Sopf;": '\U0001D54A', 470 | "Sqrt;": '\U0000221A', 471 | "Square;": '\U000025A1', 472 | "SquareIntersection;": '\U00002293', 473 | "SquareSubset;": '\U0000228F', 474 | "SquareSubsetEqual;": '\U00002291', 475 | "SquareSuperset;": '\U00002290', 476 | "SquareSupersetEqual;": '\U00002292', 477 | "SquareUnion;": '\U00002294', 478 | "Sscr;": '\U0001D4AE', 479 | "Star;": '\U000022C6', 480 | "Sub;": '\U000022D0', 481 | "Subset;": '\U000022D0', 482 | "SubsetEqual;": '\U00002286', 483 | "Succeeds;": '\U0000227B', 484 | "SucceedsEqual;": '\U00002AB0', 485 | "SucceedsSlantEqual;": '\U0000227D', 486 | "SucceedsTilde;": '\U0000227F', 487 | "SuchThat;": '\U0000220B', 488 | "Sum;": '\U00002211', 489 | "Sup;": '\U000022D1', 490 | "Superset;": '\U00002283', 491 | "SupersetEqual;": '\U00002287', 492 | "Supset;": '\U000022D1', 493 | "THORN;": '\U000000DE', 494 | "TRADE;": '\U00002122', 495 | "TSHcy;": '\U0000040B', 496 | "TScy;": '\U00000426', 497 | "Tab;": '\U00000009', 498 | "Tau;": '\U000003A4', 499 | "Tcaron;": '\U00000164', 500 | "Tcedil;": '\U00000162', 501 | "Tcy;": '\U00000422', 502 | "Tfr;": '\U0001D517', 503 | "Therefore;": '\U00002234', 504 | "Theta;": '\U00000398', 505 | "ThinSpace;": '\U00002009', 506 | "Tilde;": '\U0000223C', 507 | "TildeEqual;": '\U00002243', 508 | "TildeFullEqual;": '\U00002245', 509 | "TildeTilde;": '\U00002248', 510 | "Topf;": '\U0001D54B', 511 | "TripleDot;": '\U000020DB', 512 | "Tscr;": '\U0001D4AF', 513 | "Tstrok;": '\U00000166', 514 | "Uacute;": '\U000000DA', 515 | "Uarr;": '\U0000219F', 516 | "Uarrocir;": '\U00002949', 517 | "Ubrcy;": '\U0000040E', 518 | "Ubreve;": '\U0000016C', 519 | "Ucirc;": '\U000000DB', 520 | "Ucy;": '\U00000423', 521 | "Udblac;": '\U00000170', 522 | "Ufr;": '\U0001D518', 523 | "Ugrave;": '\U000000D9', 524 | "Umacr;": '\U0000016A', 525 | "UnderBar;": '\U0000005F', 526 | "UnderBrace;": '\U000023DF', 527 | "UnderBracket;": '\U000023B5', 528 | "UnderParenthesis;": '\U000023DD', 529 | "Union;": '\U000022C3', 530 | "UnionPlus;": '\U0000228E', 531 | "Uogon;": '\U00000172', 532 | "Uopf;": '\U0001D54C', 533 | "UpArrow;": '\U00002191', 534 | "UpArrowBar;": '\U00002912', 535 | "UpArrowDownArrow;": '\U000021C5', 536 | "UpDownArrow;": '\U00002195', 537 | "UpEquilibrium;": '\U0000296E', 538 | "UpTee;": '\U000022A5', 539 | "UpTeeArrow;": '\U000021A5', 540 | "Uparrow;": '\U000021D1', 541 | "Updownarrow;": '\U000021D5', 542 | "UpperLeftArrow;": '\U00002196', 543 | "UpperRightArrow;": '\U00002197', 544 | "Upsi;": '\U000003D2', 545 | "Upsilon;": '\U000003A5', 546 | "Uring;": '\U0000016E', 547 | "Uscr;": '\U0001D4B0', 548 | "Utilde;": '\U00000168', 549 | "Uuml;": '\U000000DC', 550 | "VDash;": '\U000022AB', 551 | "Vbar;": '\U00002AEB', 552 | "Vcy;": '\U00000412', 553 | "Vdash;": '\U000022A9', 554 | "Vdashl;": '\U00002AE6', 555 | "Vee;": '\U000022C1', 556 | "Verbar;": '\U00002016', 557 | "Vert;": '\U00002016', 558 | "VerticalBar;": '\U00002223', 559 | "VerticalLine;": '\U0000007C', 560 | "VerticalSeparator;": '\U00002758', 561 | "VerticalTilde;": '\U00002240', 562 | "VeryThinSpace;": '\U0000200A', 563 | "Vfr;": '\U0001D519', 564 | "Vopf;": '\U0001D54D', 565 | "Vscr;": '\U0001D4B1', 566 | "Vvdash;": '\U000022AA', 567 | "Wcirc;": '\U00000174', 568 | "Wedge;": '\U000022C0', 569 | "Wfr;": '\U0001D51A', 570 | "Wopf;": '\U0001D54E', 571 | "Wscr;": '\U0001D4B2', 572 | "Xfr;": '\U0001D51B', 573 | "Xi;": '\U0000039E', 574 | "Xopf;": '\U0001D54F', 575 | "Xscr;": '\U0001D4B3', 576 | "YAcy;": '\U0000042F', 577 | "YIcy;": '\U00000407', 578 | "YUcy;": '\U0000042E', 579 | "Yacute;": '\U000000DD', 580 | "Ycirc;": '\U00000176', 581 | "Ycy;": '\U0000042B', 582 | "Yfr;": '\U0001D51C', 583 | "Yopf;": '\U0001D550', 584 | "Yscr;": '\U0001D4B4', 585 | "Yuml;": '\U00000178', 586 | "ZHcy;": '\U00000416', 587 | "Zacute;": '\U00000179', 588 | "Zcaron;": '\U0000017D', 589 | "Zcy;": '\U00000417', 590 | "Zdot;": '\U0000017B', 591 | "ZeroWidthSpace;": '\U0000200B', 592 | "Zeta;": '\U00000396', 593 | "Zfr;": '\U00002128', 594 | "Zopf;": '\U00002124', 595 | "Zscr;": '\U0001D4B5', 596 | "aacute;": '\U000000E1', 597 | "abreve;": '\U00000103', 598 | "ac;": '\U0000223E', 599 | "acd;": '\U0000223F', 600 | "acirc;": '\U000000E2', 601 | "acute;": '\U000000B4', 602 | "acy;": '\U00000430', 603 | "aelig;": '\U000000E6', 604 | "af;": '\U00002061', 605 | "afr;": '\U0001D51E', 606 | "agrave;": '\U000000E0', 607 | "alefsym;": '\U00002135', 608 | "aleph;": '\U00002135', 609 | "alpha;": '\U000003B1', 610 | "amacr;": '\U00000101', 611 | "amalg;": '\U00002A3F', 612 | "amp;": '\U00000026', 613 | "and;": '\U00002227', 614 | "andand;": '\U00002A55', 615 | "andd;": '\U00002A5C', 616 | "andslope;": '\U00002A58', 617 | "andv;": '\U00002A5A', 618 | "ang;": '\U00002220', 619 | "ange;": '\U000029A4', 620 | "angle;": '\U00002220', 621 | "angmsd;": '\U00002221', 622 | "angmsdaa;": '\U000029A8', 623 | "angmsdab;": '\U000029A9', 624 | "angmsdac;": '\U000029AA', 625 | "angmsdad;": '\U000029AB', 626 | "angmsdae;": '\U000029AC', 627 | "angmsdaf;": '\U000029AD', 628 | "angmsdag;": '\U000029AE', 629 | "angmsdah;": '\U000029AF', 630 | "angrt;": '\U0000221F', 631 | "angrtvb;": '\U000022BE', 632 | "angrtvbd;": '\U0000299D', 633 | "angsph;": '\U00002222', 634 | "angst;": '\U000000C5', 635 | "angzarr;": '\U0000237C', 636 | "aogon;": '\U00000105', 637 | "aopf;": '\U0001D552', 638 | "ap;": '\U00002248', 639 | "apE;": '\U00002A70', 640 | "apacir;": '\U00002A6F', 641 | "ape;": '\U0000224A', 642 | "apid;": '\U0000224B', 643 | "apos;": '\U00000027', 644 | "approx;": '\U00002248', 645 | "approxeq;": '\U0000224A', 646 | "aring;": '\U000000E5', 647 | "ascr;": '\U0001D4B6', 648 | "ast;": '\U0000002A', 649 | "asymp;": '\U00002248', 650 | "asympeq;": '\U0000224D', 651 | "atilde;": '\U000000E3', 652 | "auml;": '\U000000E4', 653 | "awconint;": '\U00002233', 654 | "awint;": '\U00002A11', 655 | "bNot;": '\U00002AED', 656 | "backcong;": '\U0000224C', 657 | "backepsilon;": '\U000003F6', 658 | "backprime;": '\U00002035', 659 | "backsim;": '\U0000223D', 660 | "backsimeq;": '\U000022CD', 661 | "barvee;": '\U000022BD', 662 | "barwed;": '\U00002305', 663 | "barwedge;": '\U00002305', 664 | "bbrk;": '\U000023B5', 665 | "bbrktbrk;": '\U000023B6', 666 | "bcong;": '\U0000224C', 667 | "bcy;": '\U00000431', 668 | "bdquo;": '\U0000201E', 669 | "becaus;": '\U00002235', 670 | "because;": '\U00002235', 671 | "bemptyv;": '\U000029B0', 672 | "bepsi;": '\U000003F6', 673 | "bernou;": '\U0000212C', 674 | "beta;": '\U000003B2', 675 | "beth;": '\U00002136', 676 | "between;": '\U0000226C', 677 | "bfr;": '\U0001D51F', 678 | "bigcap;": '\U000022C2', 679 | "bigcirc;": '\U000025EF', 680 | "bigcup;": '\U000022C3', 681 | "bigodot;": '\U00002A00', 682 | "bigoplus;": '\U00002A01', 683 | "bigotimes;": '\U00002A02', 684 | "bigsqcup;": '\U00002A06', 685 | "bigstar;": '\U00002605', 686 | "bigtriangledown;": '\U000025BD', 687 | "bigtriangleup;": '\U000025B3', 688 | "biguplus;": '\U00002A04', 689 | "bigvee;": '\U000022C1', 690 | "bigwedge;": '\U000022C0', 691 | "bkarow;": '\U0000290D', 692 | "blacklozenge;": '\U000029EB', 693 | "blacksquare;": '\U000025AA', 694 | "blacktriangle;": '\U000025B4', 695 | "blacktriangledown;": '\U000025BE', 696 | "blacktriangleleft;": '\U000025C2', 697 | "blacktriangleright;": '\U000025B8', 698 | "blank;": '\U00002423', 699 | "blk12;": '\U00002592', 700 | "blk14;": '\U00002591', 701 | "blk34;": '\U00002593', 702 | "block;": '\U00002588', 703 | "bnot;": '\U00002310', 704 | "bopf;": '\U0001D553', 705 | "bot;": '\U000022A5', 706 | "bottom;": '\U000022A5', 707 | "bowtie;": '\U000022C8', 708 | "boxDL;": '\U00002557', 709 | "boxDR;": '\U00002554', 710 | "boxDl;": '\U00002556', 711 | "boxDr;": '\U00002553', 712 | "boxH;": '\U00002550', 713 | "boxHD;": '\U00002566', 714 | "boxHU;": '\U00002569', 715 | "boxHd;": '\U00002564', 716 | "boxHu;": '\U00002567', 717 | "boxUL;": '\U0000255D', 718 | "boxUR;": '\U0000255A', 719 | "boxUl;": '\U0000255C', 720 | "boxUr;": '\U00002559', 721 | "boxV;": '\U00002551', 722 | "boxVH;": '\U0000256C', 723 | "boxVL;": '\U00002563', 724 | "boxVR;": '\U00002560', 725 | "boxVh;": '\U0000256B', 726 | "boxVl;": '\U00002562', 727 | "boxVr;": '\U0000255F', 728 | "boxbox;": '\U000029C9', 729 | "boxdL;": '\U00002555', 730 | "boxdR;": '\U00002552', 731 | "boxdl;": '\U00002510', 732 | "boxdr;": '\U0000250C', 733 | "boxh;": '\U00002500', 734 | "boxhD;": '\U00002565', 735 | "boxhU;": '\U00002568', 736 | "boxhd;": '\U0000252C', 737 | "boxhu;": '\U00002534', 738 | "boxminus;": '\U0000229F', 739 | "boxplus;": '\U0000229E', 740 | "boxtimes;": '\U000022A0', 741 | "boxuL;": '\U0000255B', 742 | "boxuR;": '\U00002558', 743 | "boxul;": '\U00002518', 744 | "boxur;": '\U00002514', 745 | "boxv;": '\U00002502', 746 | "boxvH;": '\U0000256A', 747 | "boxvL;": '\U00002561', 748 | "boxvR;": '\U0000255E', 749 | "boxvh;": '\U0000253C', 750 | "boxvl;": '\U00002524', 751 | "boxvr;": '\U0000251C', 752 | "bprime;": '\U00002035', 753 | "breve;": '\U000002D8', 754 | "brvbar;": '\U000000A6', 755 | "bscr;": '\U0001D4B7', 756 | "bsemi;": '\U0000204F', 757 | "bsim;": '\U0000223D', 758 | "bsime;": '\U000022CD', 759 | "bsol;": '\U0000005C', 760 | "bsolb;": '\U000029C5', 761 | "bsolhsub;": '\U000027C8', 762 | "bull;": '\U00002022', 763 | "bullet;": '\U00002022', 764 | "bump;": '\U0000224E', 765 | "bumpE;": '\U00002AAE', 766 | "bumpe;": '\U0000224F', 767 | "bumpeq;": '\U0000224F', 768 | "cacute;": '\U00000107', 769 | "cap;": '\U00002229', 770 | "capand;": '\U00002A44', 771 | "capbrcup;": '\U00002A49', 772 | "capcap;": '\U00002A4B', 773 | "capcup;": '\U00002A47', 774 | "capdot;": '\U00002A40', 775 | "caret;": '\U00002041', 776 | "caron;": '\U000002C7', 777 | "ccaps;": '\U00002A4D', 778 | "ccaron;": '\U0000010D', 779 | "ccedil;": '\U000000E7', 780 | "ccirc;": '\U00000109', 781 | "ccups;": '\U00002A4C', 782 | "ccupssm;": '\U00002A50', 783 | "cdot;": '\U0000010B', 784 | "cedil;": '\U000000B8', 785 | "cemptyv;": '\U000029B2', 786 | "cent;": '\U000000A2', 787 | "centerdot;": '\U000000B7', 788 | "cfr;": '\U0001D520', 789 | "chcy;": '\U00000447', 790 | "check;": '\U00002713', 791 | "checkmark;": '\U00002713', 792 | "chi;": '\U000003C7', 793 | "cir;": '\U000025CB', 794 | "cirE;": '\U000029C3', 795 | "circ;": '\U000002C6', 796 | "circeq;": '\U00002257', 797 | "circlearrowleft;": '\U000021BA', 798 | "circlearrowright;": '\U000021BB', 799 | "circledR;": '\U000000AE', 800 | "circledS;": '\U000024C8', 801 | "circledast;": '\U0000229B', 802 | "circledcirc;": '\U0000229A', 803 | "circleddash;": '\U0000229D', 804 | "cire;": '\U00002257', 805 | "cirfnint;": '\U00002A10', 806 | "cirmid;": '\U00002AEF', 807 | "cirscir;": '\U000029C2', 808 | "clubs;": '\U00002663', 809 | "clubsuit;": '\U00002663', 810 | "colon;": '\U0000003A', 811 | "colone;": '\U00002254', 812 | "coloneq;": '\U00002254', 813 | "comma;": '\U0000002C', 814 | "commat;": '\U00000040', 815 | "comp;": '\U00002201', 816 | "compfn;": '\U00002218', 817 | "complement;": '\U00002201', 818 | "complexes;": '\U00002102', 819 | "cong;": '\U00002245', 820 | "congdot;": '\U00002A6D', 821 | "conint;": '\U0000222E', 822 | "copf;": '\U0001D554', 823 | "coprod;": '\U00002210', 824 | "copy;": '\U000000A9', 825 | "copysr;": '\U00002117', 826 | "crarr;": '\U000021B5', 827 | "cross;": '\U00002717', 828 | "cscr;": '\U0001D4B8', 829 | "csub;": '\U00002ACF', 830 | "csube;": '\U00002AD1', 831 | "csup;": '\U00002AD0', 832 | "csupe;": '\U00002AD2', 833 | "ctdot;": '\U000022EF', 834 | "cudarrl;": '\U00002938', 835 | "cudarrr;": '\U00002935', 836 | "cuepr;": '\U000022DE', 837 | "cuesc;": '\U000022DF', 838 | "cularr;": '\U000021B6', 839 | "cularrp;": '\U0000293D', 840 | "cup;": '\U0000222A', 841 | "cupbrcap;": '\U00002A48', 842 | "cupcap;": '\U00002A46', 843 | "cupcup;": '\U00002A4A', 844 | "cupdot;": '\U0000228D', 845 | "cupor;": '\U00002A45', 846 | "curarr;": '\U000021B7', 847 | "curarrm;": '\U0000293C', 848 | "curlyeqprec;": '\U000022DE', 849 | "curlyeqsucc;": '\U000022DF', 850 | "curlyvee;": '\U000022CE', 851 | "curlywedge;": '\U000022CF', 852 | "curren;": '\U000000A4', 853 | "curvearrowleft;": '\U000021B6', 854 | "curvearrowright;": '\U000021B7', 855 | "cuvee;": '\U000022CE', 856 | "cuwed;": '\U000022CF', 857 | "cwconint;": '\U00002232', 858 | "cwint;": '\U00002231', 859 | "cylcty;": '\U0000232D', 860 | "dArr;": '\U000021D3', 861 | "dHar;": '\U00002965', 862 | "dagger;": '\U00002020', 863 | "daleth;": '\U00002138', 864 | "darr;": '\U00002193', 865 | "dash;": '\U00002010', 866 | "dashv;": '\U000022A3', 867 | "dbkarow;": '\U0000290F', 868 | "dblac;": '\U000002DD', 869 | "dcaron;": '\U0000010F', 870 | "dcy;": '\U00000434', 871 | "dd;": '\U00002146', 872 | "ddagger;": '\U00002021', 873 | "ddarr;": '\U000021CA', 874 | "ddotseq;": '\U00002A77', 875 | "deg;": '\U000000B0', 876 | "delta;": '\U000003B4', 877 | "demptyv;": '\U000029B1', 878 | "dfisht;": '\U0000297F', 879 | "dfr;": '\U0001D521', 880 | "dharl;": '\U000021C3', 881 | "dharr;": '\U000021C2', 882 | "diam;": '\U000022C4', 883 | "diamond;": '\U000022C4', 884 | "diamondsuit;": '\U00002666', 885 | "diams;": '\U00002666', 886 | "die;": '\U000000A8', 887 | "digamma;": '\U000003DD', 888 | "disin;": '\U000022F2', 889 | "div;": '\U000000F7', 890 | "divide;": '\U000000F7', 891 | "divideontimes;": '\U000022C7', 892 | "divonx;": '\U000022C7', 893 | "djcy;": '\U00000452', 894 | "dlcorn;": '\U0000231E', 895 | "dlcrop;": '\U0000230D', 896 | "dollar;": '\U00000024', 897 | "dopf;": '\U0001D555', 898 | "dot;": '\U000002D9', 899 | "doteq;": '\U00002250', 900 | "doteqdot;": '\U00002251', 901 | "dotminus;": '\U00002238', 902 | "dotplus;": '\U00002214', 903 | "dotsquare;": '\U000022A1', 904 | "doublebarwedge;": '\U00002306', 905 | "downarrow;": '\U00002193', 906 | "downdownarrows;": '\U000021CA', 907 | "downharpoonleft;": '\U000021C3', 908 | "downharpoonright;": '\U000021C2', 909 | "drbkarow;": '\U00002910', 910 | "drcorn;": '\U0000231F', 911 | "drcrop;": '\U0000230C', 912 | "dscr;": '\U0001D4B9', 913 | "dscy;": '\U00000455', 914 | "dsol;": '\U000029F6', 915 | "dstrok;": '\U00000111', 916 | "dtdot;": '\U000022F1', 917 | "dtri;": '\U000025BF', 918 | "dtrif;": '\U000025BE', 919 | "duarr;": '\U000021F5', 920 | "duhar;": '\U0000296F', 921 | "dwangle;": '\U000029A6', 922 | "dzcy;": '\U0000045F', 923 | "dzigrarr;": '\U000027FF', 924 | "eDDot;": '\U00002A77', 925 | "eDot;": '\U00002251', 926 | "eacute;": '\U000000E9', 927 | "easter;": '\U00002A6E', 928 | "ecaron;": '\U0000011B', 929 | "ecir;": '\U00002256', 930 | "ecirc;": '\U000000EA', 931 | "ecolon;": '\U00002255', 932 | "ecy;": '\U0000044D', 933 | "edot;": '\U00000117', 934 | "ee;": '\U00002147', 935 | "efDot;": '\U00002252', 936 | "efr;": '\U0001D522', 937 | "eg;": '\U00002A9A', 938 | "egrave;": '\U000000E8', 939 | "egs;": '\U00002A96', 940 | "egsdot;": '\U00002A98', 941 | "el;": '\U00002A99', 942 | "elinters;": '\U000023E7', 943 | "ell;": '\U00002113', 944 | "els;": '\U00002A95', 945 | "elsdot;": '\U00002A97', 946 | "emacr;": '\U00000113', 947 | "empty;": '\U00002205', 948 | "emptyset;": '\U00002205', 949 | "emptyv;": '\U00002205', 950 | "emsp;": '\U00002003', 951 | "emsp13;": '\U00002004', 952 | "emsp14;": '\U00002005', 953 | "eng;": '\U0000014B', 954 | "ensp;": '\U00002002', 955 | "eogon;": '\U00000119', 956 | "eopf;": '\U0001D556', 957 | "epar;": '\U000022D5', 958 | "eparsl;": '\U000029E3', 959 | "eplus;": '\U00002A71', 960 | "epsi;": '\U000003B5', 961 | "epsilon;": '\U000003B5', 962 | "epsiv;": '\U000003F5', 963 | "eqcirc;": '\U00002256', 964 | "eqcolon;": '\U00002255', 965 | "eqsim;": '\U00002242', 966 | "eqslantgtr;": '\U00002A96', 967 | "eqslantless;": '\U00002A95', 968 | "equals;": '\U0000003D', 969 | "equest;": '\U0000225F', 970 | "equiv;": '\U00002261', 971 | "equivDD;": '\U00002A78', 972 | "eqvparsl;": '\U000029E5', 973 | "erDot;": '\U00002253', 974 | "erarr;": '\U00002971', 975 | "escr;": '\U0000212F', 976 | "esdot;": '\U00002250', 977 | "esim;": '\U00002242', 978 | "eta;": '\U000003B7', 979 | "eth;": '\U000000F0', 980 | "euml;": '\U000000EB', 981 | "euro;": '\U000020AC', 982 | "excl;": '\U00000021', 983 | "exist;": '\U00002203', 984 | "expectation;": '\U00002130', 985 | "exponentiale;": '\U00002147', 986 | "fallingdotseq;": '\U00002252', 987 | "fcy;": '\U00000444', 988 | "female;": '\U00002640', 989 | "ffilig;": '\U0000FB03', 990 | "fflig;": '\U0000FB00', 991 | "ffllig;": '\U0000FB04', 992 | "ffr;": '\U0001D523', 993 | "filig;": '\U0000FB01', 994 | "flat;": '\U0000266D', 995 | "fllig;": '\U0000FB02', 996 | "fltns;": '\U000025B1', 997 | "fnof;": '\U00000192', 998 | "fopf;": '\U0001D557', 999 | "forall;": '\U00002200', 1000 | "fork;": '\U000022D4', 1001 | "forkv;": '\U00002AD9', 1002 | "fpartint;": '\U00002A0D', 1003 | "frac12;": '\U000000BD', 1004 | "frac13;": '\U00002153', 1005 | "frac14;": '\U000000BC', 1006 | "frac15;": '\U00002155', 1007 | "frac16;": '\U00002159', 1008 | "frac18;": '\U0000215B', 1009 | "frac23;": '\U00002154', 1010 | "frac25;": '\U00002156', 1011 | "frac34;": '\U000000BE', 1012 | "frac35;": '\U00002157', 1013 | "frac38;": '\U0000215C', 1014 | "frac45;": '\U00002158', 1015 | "frac56;": '\U0000215A', 1016 | "frac58;": '\U0000215D', 1017 | "frac78;": '\U0000215E', 1018 | "frasl;": '\U00002044', 1019 | "frown;": '\U00002322', 1020 | "fscr;": '\U0001D4BB', 1021 | "gE;": '\U00002267', 1022 | "gEl;": '\U00002A8C', 1023 | "gacute;": '\U000001F5', 1024 | "gamma;": '\U000003B3', 1025 | "gammad;": '\U000003DD', 1026 | "gap;": '\U00002A86', 1027 | "gbreve;": '\U0000011F', 1028 | "gcirc;": '\U0000011D', 1029 | "gcy;": '\U00000433', 1030 | "gdot;": '\U00000121', 1031 | "ge;": '\U00002265', 1032 | "gel;": '\U000022DB', 1033 | "geq;": '\U00002265', 1034 | "geqq;": '\U00002267', 1035 | "geqslant;": '\U00002A7E', 1036 | "ges;": '\U00002A7E', 1037 | "gescc;": '\U00002AA9', 1038 | "gesdot;": '\U00002A80', 1039 | "gesdoto;": '\U00002A82', 1040 | "gesdotol;": '\U00002A84', 1041 | "gesles;": '\U00002A94', 1042 | "gfr;": '\U0001D524', 1043 | "gg;": '\U0000226B', 1044 | "ggg;": '\U000022D9', 1045 | "gimel;": '\U00002137', 1046 | "gjcy;": '\U00000453', 1047 | "gl;": '\U00002277', 1048 | "glE;": '\U00002A92', 1049 | "gla;": '\U00002AA5', 1050 | "glj;": '\U00002AA4', 1051 | "gnE;": '\U00002269', 1052 | "gnap;": '\U00002A8A', 1053 | "gnapprox;": '\U00002A8A', 1054 | "gne;": '\U00002A88', 1055 | "gneq;": '\U00002A88', 1056 | "gneqq;": '\U00002269', 1057 | "gnsim;": '\U000022E7', 1058 | "gopf;": '\U0001D558', 1059 | "grave;": '\U00000060', 1060 | "gscr;": '\U0000210A', 1061 | "gsim;": '\U00002273', 1062 | "gsime;": '\U00002A8E', 1063 | "gsiml;": '\U00002A90', 1064 | "gt;": '\U0000003E', 1065 | "gtcc;": '\U00002AA7', 1066 | "gtcir;": '\U00002A7A', 1067 | "gtdot;": '\U000022D7', 1068 | "gtlPar;": '\U00002995', 1069 | "gtquest;": '\U00002A7C', 1070 | "gtrapprox;": '\U00002A86', 1071 | "gtrarr;": '\U00002978', 1072 | "gtrdot;": '\U000022D7', 1073 | "gtreqless;": '\U000022DB', 1074 | "gtreqqless;": '\U00002A8C', 1075 | "gtrless;": '\U00002277', 1076 | "gtrsim;": '\U00002273', 1077 | "hArr;": '\U000021D4', 1078 | "hairsp;": '\U0000200A', 1079 | "half;": '\U000000BD', 1080 | "hamilt;": '\U0000210B', 1081 | "hardcy;": '\U0000044A', 1082 | "harr;": '\U00002194', 1083 | "harrcir;": '\U00002948', 1084 | "harrw;": '\U000021AD', 1085 | "hbar;": '\U0000210F', 1086 | "hcirc;": '\U00000125', 1087 | "hearts;": '\U00002665', 1088 | "heartsuit;": '\U00002665', 1089 | "hellip;": '\U00002026', 1090 | "hercon;": '\U000022B9', 1091 | "hfr;": '\U0001D525', 1092 | "hksearow;": '\U00002925', 1093 | "hkswarow;": '\U00002926', 1094 | "hoarr;": '\U000021FF', 1095 | "homtht;": '\U0000223B', 1096 | "hookleftarrow;": '\U000021A9', 1097 | "hookrightarrow;": '\U000021AA', 1098 | "hopf;": '\U0001D559', 1099 | "horbar;": '\U00002015', 1100 | "hscr;": '\U0001D4BD', 1101 | "hslash;": '\U0000210F', 1102 | "hstrok;": '\U00000127', 1103 | "hybull;": '\U00002043', 1104 | "hyphen;": '\U00002010', 1105 | "iacute;": '\U000000ED', 1106 | "ic;": '\U00002063', 1107 | "icirc;": '\U000000EE', 1108 | "icy;": '\U00000438', 1109 | "iecy;": '\U00000435', 1110 | "iexcl;": '\U000000A1', 1111 | "iff;": '\U000021D4', 1112 | "ifr;": '\U0001D526', 1113 | "igrave;": '\U000000EC', 1114 | "ii;": '\U00002148', 1115 | "iiiint;": '\U00002A0C', 1116 | "iiint;": '\U0000222D', 1117 | "iinfin;": '\U000029DC', 1118 | "iiota;": '\U00002129', 1119 | "ijlig;": '\U00000133', 1120 | "imacr;": '\U0000012B', 1121 | "image;": '\U00002111', 1122 | "imagline;": '\U00002110', 1123 | "imagpart;": '\U00002111', 1124 | "imath;": '\U00000131', 1125 | "imof;": '\U000022B7', 1126 | "imped;": '\U000001B5', 1127 | "in;": '\U00002208', 1128 | "incare;": '\U00002105', 1129 | "infin;": '\U0000221E', 1130 | "infintie;": '\U000029DD', 1131 | "inodot;": '\U00000131', 1132 | "int;": '\U0000222B', 1133 | "intcal;": '\U000022BA', 1134 | "integers;": '\U00002124', 1135 | "intercal;": '\U000022BA', 1136 | "intlarhk;": '\U00002A17', 1137 | "intprod;": '\U00002A3C', 1138 | "iocy;": '\U00000451', 1139 | "iogon;": '\U0000012F', 1140 | "iopf;": '\U0001D55A', 1141 | "iota;": '\U000003B9', 1142 | "iprod;": '\U00002A3C', 1143 | "iquest;": '\U000000BF', 1144 | "iscr;": '\U0001D4BE', 1145 | "isin;": '\U00002208', 1146 | "isinE;": '\U000022F9', 1147 | "isindot;": '\U000022F5', 1148 | "isins;": '\U000022F4', 1149 | "isinsv;": '\U000022F3', 1150 | "isinv;": '\U00002208', 1151 | "it;": '\U00002062', 1152 | "itilde;": '\U00000129', 1153 | "iukcy;": '\U00000456', 1154 | "iuml;": '\U000000EF', 1155 | "jcirc;": '\U00000135', 1156 | "jcy;": '\U00000439', 1157 | "jfr;": '\U0001D527', 1158 | "jmath;": '\U00000237', 1159 | "jopf;": '\U0001D55B', 1160 | "jscr;": '\U0001D4BF', 1161 | "jsercy;": '\U00000458', 1162 | "jukcy;": '\U00000454', 1163 | "kappa;": '\U000003BA', 1164 | "kappav;": '\U000003F0', 1165 | "kcedil;": '\U00000137', 1166 | "kcy;": '\U0000043A', 1167 | "kfr;": '\U0001D528', 1168 | "kgreen;": '\U00000138', 1169 | "khcy;": '\U00000445', 1170 | "kjcy;": '\U0000045C', 1171 | "kopf;": '\U0001D55C', 1172 | "kscr;": '\U0001D4C0', 1173 | "lAarr;": '\U000021DA', 1174 | "lArr;": '\U000021D0', 1175 | "lAtail;": '\U0000291B', 1176 | "lBarr;": '\U0000290E', 1177 | "lE;": '\U00002266', 1178 | "lEg;": '\U00002A8B', 1179 | "lHar;": '\U00002962', 1180 | "lacute;": '\U0000013A', 1181 | "laemptyv;": '\U000029B4', 1182 | "lagran;": '\U00002112', 1183 | "lambda;": '\U000003BB', 1184 | "lang;": '\U000027E8', 1185 | "langd;": '\U00002991', 1186 | "langle;": '\U000027E8', 1187 | "lap;": '\U00002A85', 1188 | "laquo;": '\U000000AB', 1189 | "larr;": '\U00002190', 1190 | "larrb;": '\U000021E4', 1191 | "larrbfs;": '\U0000291F', 1192 | "larrfs;": '\U0000291D', 1193 | "larrhk;": '\U000021A9', 1194 | "larrlp;": '\U000021AB', 1195 | "larrpl;": '\U00002939', 1196 | "larrsim;": '\U00002973', 1197 | "larrtl;": '\U000021A2', 1198 | "lat;": '\U00002AAB', 1199 | "latail;": '\U00002919', 1200 | "late;": '\U00002AAD', 1201 | "lbarr;": '\U0000290C', 1202 | "lbbrk;": '\U00002772', 1203 | "lbrace;": '\U0000007B', 1204 | "lbrack;": '\U0000005B', 1205 | "lbrke;": '\U0000298B', 1206 | "lbrksld;": '\U0000298F', 1207 | "lbrkslu;": '\U0000298D', 1208 | "lcaron;": '\U0000013E', 1209 | "lcedil;": '\U0000013C', 1210 | "lceil;": '\U00002308', 1211 | "lcub;": '\U0000007B', 1212 | "lcy;": '\U0000043B', 1213 | "ldca;": '\U00002936', 1214 | "ldquo;": '\U0000201C', 1215 | "ldquor;": '\U0000201E', 1216 | "ldrdhar;": '\U00002967', 1217 | "ldrushar;": '\U0000294B', 1218 | "ldsh;": '\U000021B2', 1219 | "le;": '\U00002264', 1220 | "leftarrow;": '\U00002190', 1221 | "leftarrowtail;": '\U000021A2', 1222 | "leftharpoondown;": '\U000021BD', 1223 | "leftharpoonup;": '\U000021BC', 1224 | "leftleftarrows;": '\U000021C7', 1225 | "leftrightarrow;": '\U00002194', 1226 | "leftrightarrows;": '\U000021C6', 1227 | "leftrightharpoons;": '\U000021CB', 1228 | "leftrightsquigarrow;": '\U000021AD', 1229 | "leftthreetimes;": '\U000022CB', 1230 | "leg;": '\U000022DA', 1231 | "leq;": '\U00002264', 1232 | "leqq;": '\U00002266', 1233 | "leqslant;": '\U00002A7D', 1234 | "les;": '\U00002A7D', 1235 | "lescc;": '\U00002AA8', 1236 | "lesdot;": '\U00002A7F', 1237 | "lesdoto;": '\U00002A81', 1238 | "lesdotor;": '\U00002A83', 1239 | "lesges;": '\U00002A93', 1240 | "lessapprox;": '\U00002A85', 1241 | "lessdot;": '\U000022D6', 1242 | "lesseqgtr;": '\U000022DA', 1243 | "lesseqqgtr;": '\U00002A8B', 1244 | "lessgtr;": '\U00002276', 1245 | "lesssim;": '\U00002272', 1246 | "lfisht;": '\U0000297C', 1247 | "lfloor;": '\U0000230A', 1248 | "lfr;": '\U0001D529', 1249 | "lg;": '\U00002276', 1250 | "lgE;": '\U00002A91', 1251 | "lhard;": '\U000021BD', 1252 | "lharu;": '\U000021BC', 1253 | "lharul;": '\U0000296A', 1254 | "lhblk;": '\U00002584', 1255 | "ljcy;": '\U00000459', 1256 | "ll;": '\U0000226A', 1257 | "llarr;": '\U000021C7', 1258 | "llcorner;": '\U0000231E', 1259 | "llhard;": '\U0000296B', 1260 | "lltri;": '\U000025FA', 1261 | "lmidot;": '\U00000140', 1262 | "lmoust;": '\U000023B0', 1263 | "lmoustache;": '\U000023B0', 1264 | "lnE;": '\U00002268', 1265 | "lnap;": '\U00002A89', 1266 | "lnapprox;": '\U00002A89', 1267 | "lne;": '\U00002A87', 1268 | "lneq;": '\U00002A87', 1269 | "lneqq;": '\U00002268', 1270 | "lnsim;": '\U000022E6', 1271 | "loang;": '\U000027EC', 1272 | "loarr;": '\U000021FD', 1273 | "lobrk;": '\U000027E6', 1274 | "longleftarrow;": '\U000027F5', 1275 | "longleftrightarrow;": '\U000027F7', 1276 | "longmapsto;": '\U000027FC', 1277 | "longrightarrow;": '\U000027F6', 1278 | "looparrowleft;": '\U000021AB', 1279 | "looparrowright;": '\U000021AC', 1280 | "lopar;": '\U00002985', 1281 | "lopf;": '\U0001D55D', 1282 | "loplus;": '\U00002A2D', 1283 | "lotimes;": '\U00002A34', 1284 | "lowast;": '\U00002217', 1285 | "lowbar;": '\U0000005F', 1286 | "loz;": '\U000025CA', 1287 | "lozenge;": '\U000025CA', 1288 | "lozf;": '\U000029EB', 1289 | "lpar;": '\U00000028', 1290 | "lparlt;": '\U00002993', 1291 | "lrarr;": '\U000021C6', 1292 | "lrcorner;": '\U0000231F', 1293 | "lrhar;": '\U000021CB', 1294 | "lrhard;": '\U0000296D', 1295 | "lrm;": '\U0000200E', 1296 | "lrtri;": '\U000022BF', 1297 | "lsaquo;": '\U00002039', 1298 | "lscr;": '\U0001D4C1', 1299 | "lsh;": '\U000021B0', 1300 | "lsim;": '\U00002272', 1301 | "lsime;": '\U00002A8D', 1302 | "lsimg;": '\U00002A8F', 1303 | "lsqb;": '\U0000005B', 1304 | "lsquo;": '\U00002018', 1305 | "lsquor;": '\U0000201A', 1306 | "lstrok;": '\U00000142', 1307 | "lt;": '\U0000003C', 1308 | "ltcc;": '\U00002AA6', 1309 | "ltcir;": '\U00002A79', 1310 | "ltdot;": '\U000022D6', 1311 | "lthree;": '\U000022CB', 1312 | "ltimes;": '\U000022C9', 1313 | "ltlarr;": '\U00002976', 1314 | "ltquest;": '\U00002A7B', 1315 | "ltrPar;": '\U00002996', 1316 | "ltri;": '\U000025C3', 1317 | "ltrie;": '\U000022B4', 1318 | "ltrif;": '\U000025C2', 1319 | "lurdshar;": '\U0000294A', 1320 | "luruhar;": '\U00002966', 1321 | "mDDot;": '\U0000223A', 1322 | "macr;": '\U000000AF', 1323 | "male;": '\U00002642', 1324 | "malt;": '\U00002720', 1325 | "maltese;": '\U00002720', 1326 | "map;": '\U000021A6', 1327 | "mapsto;": '\U000021A6', 1328 | "mapstodown;": '\U000021A7', 1329 | "mapstoleft;": '\U000021A4', 1330 | "mapstoup;": '\U000021A5', 1331 | "marker;": '\U000025AE', 1332 | "mcomma;": '\U00002A29', 1333 | "mcy;": '\U0000043C', 1334 | "mdash;": '\U00002014', 1335 | "measuredangle;": '\U00002221', 1336 | "mfr;": '\U0001D52A', 1337 | "mho;": '\U00002127', 1338 | "micro;": '\U000000B5', 1339 | "mid;": '\U00002223', 1340 | "midast;": '\U0000002A', 1341 | "midcir;": '\U00002AF0', 1342 | "middot;": '\U000000B7', 1343 | "minus;": '\U00002212', 1344 | "minusb;": '\U0000229F', 1345 | "minusd;": '\U00002238', 1346 | "minusdu;": '\U00002A2A', 1347 | "mlcp;": '\U00002ADB', 1348 | "mldr;": '\U00002026', 1349 | "mnplus;": '\U00002213', 1350 | "models;": '\U000022A7', 1351 | "mopf;": '\U0001D55E', 1352 | "mp;": '\U00002213', 1353 | "mscr;": '\U0001D4C2', 1354 | "mstpos;": '\U0000223E', 1355 | "mu;": '\U000003BC', 1356 | "multimap;": '\U000022B8', 1357 | "mumap;": '\U000022B8', 1358 | "nLeftarrow;": '\U000021CD', 1359 | "nLeftrightarrow;": '\U000021CE', 1360 | "nRightarrow;": '\U000021CF', 1361 | "nVDash;": '\U000022AF', 1362 | "nVdash;": '\U000022AE', 1363 | "nabla;": '\U00002207', 1364 | "nacute;": '\U00000144', 1365 | "nap;": '\U00002249', 1366 | "napos;": '\U00000149', 1367 | "napprox;": '\U00002249', 1368 | "natur;": '\U0000266E', 1369 | "natural;": '\U0000266E', 1370 | "naturals;": '\U00002115', 1371 | "nbsp;": '\U000000A0', 1372 | "ncap;": '\U00002A43', 1373 | "ncaron;": '\U00000148', 1374 | "ncedil;": '\U00000146', 1375 | "ncong;": '\U00002247', 1376 | "ncup;": '\U00002A42', 1377 | "ncy;": '\U0000043D', 1378 | "ndash;": '\U00002013', 1379 | "ne;": '\U00002260', 1380 | "neArr;": '\U000021D7', 1381 | "nearhk;": '\U00002924', 1382 | "nearr;": '\U00002197', 1383 | "nearrow;": '\U00002197', 1384 | "nequiv;": '\U00002262', 1385 | "nesear;": '\U00002928', 1386 | "nexist;": '\U00002204', 1387 | "nexists;": '\U00002204', 1388 | "nfr;": '\U0001D52B', 1389 | "nge;": '\U00002271', 1390 | "ngeq;": '\U00002271', 1391 | "ngsim;": '\U00002275', 1392 | "ngt;": '\U0000226F', 1393 | "ngtr;": '\U0000226F', 1394 | "nhArr;": '\U000021CE', 1395 | "nharr;": '\U000021AE', 1396 | "nhpar;": '\U00002AF2', 1397 | "ni;": '\U0000220B', 1398 | "nis;": '\U000022FC', 1399 | "nisd;": '\U000022FA', 1400 | "niv;": '\U0000220B', 1401 | "njcy;": '\U0000045A', 1402 | "nlArr;": '\U000021CD', 1403 | "nlarr;": '\U0000219A', 1404 | "nldr;": '\U00002025', 1405 | "nle;": '\U00002270', 1406 | "nleftarrow;": '\U0000219A', 1407 | "nleftrightarrow;": '\U000021AE', 1408 | "nleq;": '\U00002270', 1409 | "nless;": '\U0000226E', 1410 | "nlsim;": '\U00002274', 1411 | "nlt;": '\U0000226E', 1412 | "nltri;": '\U000022EA', 1413 | "nltrie;": '\U000022EC', 1414 | "nmid;": '\U00002224', 1415 | "nopf;": '\U0001D55F', 1416 | "not;": '\U000000AC', 1417 | "notin;": '\U00002209', 1418 | "notinva;": '\U00002209', 1419 | "notinvb;": '\U000022F7', 1420 | "notinvc;": '\U000022F6', 1421 | "notni;": '\U0000220C', 1422 | "notniva;": '\U0000220C', 1423 | "notnivb;": '\U000022FE', 1424 | "notnivc;": '\U000022FD', 1425 | "npar;": '\U00002226', 1426 | "nparallel;": '\U00002226', 1427 | "npolint;": '\U00002A14', 1428 | "npr;": '\U00002280', 1429 | "nprcue;": '\U000022E0', 1430 | "nprec;": '\U00002280', 1431 | "nrArr;": '\U000021CF', 1432 | "nrarr;": '\U0000219B', 1433 | "nrightarrow;": '\U0000219B', 1434 | "nrtri;": '\U000022EB', 1435 | "nrtrie;": '\U000022ED', 1436 | "nsc;": '\U00002281', 1437 | "nsccue;": '\U000022E1', 1438 | "nscr;": '\U0001D4C3', 1439 | "nshortmid;": '\U00002224', 1440 | "nshortparallel;": '\U00002226', 1441 | "nsim;": '\U00002241', 1442 | "nsime;": '\U00002244', 1443 | "nsimeq;": '\U00002244', 1444 | "nsmid;": '\U00002224', 1445 | "nspar;": '\U00002226', 1446 | "nsqsube;": '\U000022E2', 1447 | "nsqsupe;": '\U000022E3', 1448 | "nsub;": '\U00002284', 1449 | "nsube;": '\U00002288', 1450 | "nsubseteq;": '\U00002288', 1451 | "nsucc;": '\U00002281', 1452 | "nsup;": '\U00002285', 1453 | "nsupe;": '\U00002289', 1454 | "nsupseteq;": '\U00002289', 1455 | "ntgl;": '\U00002279', 1456 | "ntilde;": '\U000000F1', 1457 | "ntlg;": '\U00002278', 1458 | "ntriangleleft;": '\U000022EA', 1459 | "ntrianglelefteq;": '\U000022EC', 1460 | "ntriangleright;": '\U000022EB', 1461 | "ntrianglerighteq;": '\U000022ED', 1462 | "nu;": '\U000003BD', 1463 | "num;": '\U00000023', 1464 | "numero;": '\U00002116', 1465 | "numsp;": '\U00002007', 1466 | "nvDash;": '\U000022AD', 1467 | "nvHarr;": '\U00002904', 1468 | "nvdash;": '\U000022AC', 1469 | "nvinfin;": '\U000029DE', 1470 | "nvlArr;": '\U00002902', 1471 | "nvrArr;": '\U00002903', 1472 | "nwArr;": '\U000021D6', 1473 | "nwarhk;": '\U00002923', 1474 | "nwarr;": '\U00002196', 1475 | "nwarrow;": '\U00002196', 1476 | "nwnear;": '\U00002927', 1477 | "oS;": '\U000024C8', 1478 | "oacute;": '\U000000F3', 1479 | "oast;": '\U0000229B', 1480 | "ocir;": '\U0000229A', 1481 | "ocirc;": '\U000000F4', 1482 | "ocy;": '\U0000043E', 1483 | "odash;": '\U0000229D', 1484 | "odblac;": '\U00000151', 1485 | "odiv;": '\U00002A38', 1486 | "odot;": '\U00002299', 1487 | "odsold;": '\U000029BC', 1488 | "oelig;": '\U00000153', 1489 | "ofcir;": '\U000029BF', 1490 | "ofr;": '\U0001D52C', 1491 | "ogon;": '\U000002DB', 1492 | "ograve;": '\U000000F2', 1493 | "ogt;": '\U000029C1', 1494 | "ohbar;": '\U000029B5', 1495 | "ohm;": '\U000003A9', 1496 | "oint;": '\U0000222E', 1497 | "olarr;": '\U000021BA', 1498 | "olcir;": '\U000029BE', 1499 | "olcross;": '\U000029BB', 1500 | "oline;": '\U0000203E', 1501 | "olt;": '\U000029C0', 1502 | "omacr;": '\U0000014D', 1503 | "omega;": '\U000003C9', 1504 | "omicron;": '\U000003BF', 1505 | "omid;": '\U000029B6', 1506 | "ominus;": '\U00002296', 1507 | "oopf;": '\U0001D560', 1508 | "opar;": '\U000029B7', 1509 | "operp;": '\U000029B9', 1510 | "oplus;": '\U00002295', 1511 | "or;": '\U00002228', 1512 | "orarr;": '\U000021BB', 1513 | "ord;": '\U00002A5D', 1514 | "order;": '\U00002134', 1515 | "orderof;": '\U00002134', 1516 | "ordf;": '\U000000AA', 1517 | "ordm;": '\U000000BA', 1518 | "origof;": '\U000022B6', 1519 | "oror;": '\U00002A56', 1520 | "orslope;": '\U00002A57', 1521 | "orv;": '\U00002A5B', 1522 | "oscr;": '\U00002134', 1523 | "oslash;": '\U000000F8', 1524 | "osol;": '\U00002298', 1525 | "otilde;": '\U000000F5', 1526 | "otimes;": '\U00002297', 1527 | "otimesas;": '\U00002A36', 1528 | "ouml;": '\U000000F6', 1529 | "ovbar;": '\U0000233D', 1530 | "par;": '\U00002225', 1531 | "para;": '\U000000B6', 1532 | "parallel;": '\U00002225', 1533 | "parsim;": '\U00002AF3', 1534 | "parsl;": '\U00002AFD', 1535 | "part;": '\U00002202', 1536 | "pcy;": '\U0000043F', 1537 | "percnt;": '\U00000025', 1538 | "period;": '\U0000002E', 1539 | "permil;": '\U00002030', 1540 | "perp;": '\U000022A5', 1541 | "pertenk;": '\U00002031', 1542 | "pfr;": '\U0001D52D', 1543 | "phi;": '\U000003C6', 1544 | "phiv;": '\U000003D5', 1545 | "phmmat;": '\U00002133', 1546 | "phone;": '\U0000260E', 1547 | "pi;": '\U000003C0', 1548 | "pitchfork;": '\U000022D4', 1549 | "piv;": '\U000003D6', 1550 | "planck;": '\U0000210F', 1551 | "planckh;": '\U0000210E', 1552 | "plankv;": '\U0000210F', 1553 | "plus;": '\U0000002B', 1554 | "plusacir;": '\U00002A23', 1555 | "plusb;": '\U0000229E', 1556 | "pluscir;": '\U00002A22', 1557 | "plusdo;": '\U00002214', 1558 | "plusdu;": '\U00002A25', 1559 | "pluse;": '\U00002A72', 1560 | "plusmn;": '\U000000B1', 1561 | "plussim;": '\U00002A26', 1562 | "plustwo;": '\U00002A27', 1563 | "pm;": '\U000000B1', 1564 | "pointint;": '\U00002A15', 1565 | "popf;": '\U0001D561', 1566 | "pound;": '\U000000A3', 1567 | "pr;": '\U0000227A', 1568 | "prE;": '\U00002AB3', 1569 | "prap;": '\U00002AB7', 1570 | "prcue;": '\U0000227C', 1571 | "pre;": '\U00002AAF', 1572 | "prec;": '\U0000227A', 1573 | "precapprox;": '\U00002AB7', 1574 | "preccurlyeq;": '\U0000227C', 1575 | "preceq;": '\U00002AAF', 1576 | "precnapprox;": '\U00002AB9', 1577 | "precneqq;": '\U00002AB5', 1578 | "precnsim;": '\U000022E8', 1579 | "precsim;": '\U0000227E', 1580 | "prime;": '\U00002032', 1581 | "primes;": '\U00002119', 1582 | "prnE;": '\U00002AB5', 1583 | "prnap;": '\U00002AB9', 1584 | "prnsim;": '\U000022E8', 1585 | "prod;": '\U0000220F', 1586 | "profalar;": '\U0000232E', 1587 | "profline;": '\U00002312', 1588 | "profsurf;": '\U00002313', 1589 | "prop;": '\U0000221D', 1590 | "propto;": '\U0000221D', 1591 | "prsim;": '\U0000227E', 1592 | "prurel;": '\U000022B0', 1593 | "pscr;": '\U0001D4C5', 1594 | "psi;": '\U000003C8', 1595 | "puncsp;": '\U00002008', 1596 | "qfr;": '\U0001D52E', 1597 | "qint;": '\U00002A0C', 1598 | "qopf;": '\U0001D562', 1599 | "qprime;": '\U00002057', 1600 | "qscr;": '\U0001D4C6', 1601 | "quaternions;": '\U0000210D', 1602 | "quatint;": '\U00002A16', 1603 | "quest;": '\U0000003F', 1604 | "questeq;": '\U0000225F', 1605 | "quot;": '\U00000022', 1606 | "rAarr;": '\U000021DB', 1607 | "rArr;": '\U000021D2', 1608 | "rAtail;": '\U0000291C', 1609 | "rBarr;": '\U0000290F', 1610 | "rHar;": '\U00002964', 1611 | "racute;": '\U00000155', 1612 | "radic;": '\U0000221A', 1613 | "raemptyv;": '\U000029B3', 1614 | "rang;": '\U000027E9', 1615 | "rangd;": '\U00002992', 1616 | "range;": '\U000029A5', 1617 | "rangle;": '\U000027E9', 1618 | "raquo;": '\U000000BB', 1619 | "rarr;": '\U00002192', 1620 | "rarrap;": '\U00002975', 1621 | "rarrb;": '\U000021E5', 1622 | "rarrbfs;": '\U00002920', 1623 | "rarrc;": '\U00002933', 1624 | "rarrfs;": '\U0000291E', 1625 | "rarrhk;": '\U000021AA', 1626 | "rarrlp;": '\U000021AC', 1627 | "rarrpl;": '\U00002945', 1628 | "rarrsim;": '\U00002974', 1629 | "rarrtl;": '\U000021A3', 1630 | "rarrw;": '\U0000219D', 1631 | "ratail;": '\U0000291A', 1632 | "ratio;": '\U00002236', 1633 | "rationals;": '\U0000211A', 1634 | "rbarr;": '\U0000290D', 1635 | "rbbrk;": '\U00002773', 1636 | "rbrace;": '\U0000007D', 1637 | "rbrack;": '\U0000005D', 1638 | "rbrke;": '\U0000298C', 1639 | "rbrksld;": '\U0000298E', 1640 | "rbrkslu;": '\U00002990', 1641 | "rcaron;": '\U00000159', 1642 | "rcedil;": '\U00000157', 1643 | "rceil;": '\U00002309', 1644 | "rcub;": '\U0000007D', 1645 | "rcy;": '\U00000440', 1646 | "rdca;": '\U00002937', 1647 | "rdldhar;": '\U00002969', 1648 | "rdquo;": '\U0000201D', 1649 | "rdquor;": '\U0000201D', 1650 | "rdsh;": '\U000021B3', 1651 | "real;": '\U0000211C', 1652 | "realine;": '\U0000211B', 1653 | "realpart;": '\U0000211C', 1654 | "reals;": '\U0000211D', 1655 | "rect;": '\U000025AD', 1656 | "reg;": '\U000000AE', 1657 | "rfisht;": '\U0000297D', 1658 | "rfloor;": '\U0000230B', 1659 | "rfr;": '\U0001D52F', 1660 | "rhard;": '\U000021C1', 1661 | "rharu;": '\U000021C0', 1662 | "rharul;": '\U0000296C', 1663 | "rho;": '\U000003C1', 1664 | "rhov;": '\U000003F1', 1665 | "rightarrow;": '\U00002192', 1666 | "rightarrowtail;": '\U000021A3', 1667 | "rightharpoondown;": '\U000021C1', 1668 | "rightharpoonup;": '\U000021C0', 1669 | "rightleftarrows;": '\U000021C4', 1670 | "rightleftharpoons;": '\U000021CC', 1671 | "rightrightarrows;": '\U000021C9', 1672 | "rightsquigarrow;": '\U0000219D', 1673 | "rightthreetimes;": '\U000022CC', 1674 | "ring;": '\U000002DA', 1675 | "risingdotseq;": '\U00002253', 1676 | "rlarr;": '\U000021C4', 1677 | "rlhar;": '\U000021CC', 1678 | "rlm;": '\U0000200F', 1679 | "rmoust;": '\U000023B1', 1680 | "rmoustache;": '\U000023B1', 1681 | "rnmid;": '\U00002AEE', 1682 | "roang;": '\U000027ED', 1683 | "roarr;": '\U000021FE', 1684 | "robrk;": '\U000027E7', 1685 | "ropar;": '\U00002986', 1686 | "ropf;": '\U0001D563', 1687 | "roplus;": '\U00002A2E', 1688 | "rotimes;": '\U00002A35', 1689 | "rpar;": '\U00000029', 1690 | "rpargt;": '\U00002994', 1691 | "rppolint;": '\U00002A12', 1692 | "rrarr;": '\U000021C9', 1693 | "rsaquo;": '\U0000203A', 1694 | "rscr;": '\U0001D4C7', 1695 | "rsh;": '\U000021B1', 1696 | "rsqb;": '\U0000005D', 1697 | "rsquo;": '\U00002019', 1698 | "rsquor;": '\U00002019', 1699 | "rthree;": '\U000022CC', 1700 | "rtimes;": '\U000022CA', 1701 | "rtri;": '\U000025B9', 1702 | "rtrie;": '\U000022B5', 1703 | "rtrif;": '\U000025B8', 1704 | "rtriltri;": '\U000029CE', 1705 | "ruluhar;": '\U00002968', 1706 | "rx;": '\U0000211E', 1707 | "sacute;": '\U0000015B', 1708 | "sbquo;": '\U0000201A', 1709 | "sc;": '\U0000227B', 1710 | "scE;": '\U00002AB4', 1711 | "scap;": '\U00002AB8', 1712 | "scaron;": '\U00000161', 1713 | "sccue;": '\U0000227D', 1714 | "sce;": '\U00002AB0', 1715 | "scedil;": '\U0000015F', 1716 | "scirc;": '\U0000015D', 1717 | "scnE;": '\U00002AB6', 1718 | "scnap;": '\U00002ABA', 1719 | "scnsim;": '\U000022E9', 1720 | "scpolint;": '\U00002A13', 1721 | "scsim;": '\U0000227F', 1722 | "scy;": '\U00000441', 1723 | "sdot;": '\U000022C5', 1724 | "sdotb;": '\U000022A1', 1725 | "sdote;": '\U00002A66', 1726 | "seArr;": '\U000021D8', 1727 | "searhk;": '\U00002925', 1728 | "searr;": '\U00002198', 1729 | "searrow;": '\U00002198', 1730 | "sect;": '\U000000A7', 1731 | "semi;": '\U0000003B', 1732 | "seswar;": '\U00002929', 1733 | "setminus;": '\U00002216', 1734 | "setmn;": '\U00002216', 1735 | "sext;": '\U00002736', 1736 | "sfr;": '\U0001D530', 1737 | "sfrown;": '\U00002322', 1738 | "sharp;": '\U0000266F', 1739 | "shchcy;": '\U00000449', 1740 | "shcy;": '\U00000448', 1741 | "shortmid;": '\U00002223', 1742 | "shortparallel;": '\U00002225', 1743 | "shy;": '\U000000AD', 1744 | "sigma;": '\U000003C3', 1745 | "sigmaf;": '\U000003C2', 1746 | "sigmav;": '\U000003C2', 1747 | "sim;": '\U0000223C', 1748 | "simdot;": '\U00002A6A', 1749 | "sime;": '\U00002243', 1750 | "simeq;": '\U00002243', 1751 | "simg;": '\U00002A9E', 1752 | "simgE;": '\U00002AA0', 1753 | "siml;": '\U00002A9D', 1754 | "simlE;": '\U00002A9F', 1755 | "simne;": '\U00002246', 1756 | "simplus;": '\U00002A24', 1757 | "simrarr;": '\U00002972', 1758 | "slarr;": '\U00002190', 1759 | "smallsetminus;": '\U00002216', 1760 | "smashp;": '\U00002A33', 1761 | "smeparsl;": '\U000029E4', 1762 | "smid;": '\U00002223', 1763 | "smile;": '\U00002323', 1764 | "smt;": '\U00002AAA', 1765 | "smte;": '\U00002AAC', 1766 | "softcy;": '\U0000044C', 1767 | "sol;": '\U0000002F', 1768 | "solb;": '\U000029C4', 1769 | "solbar;": '\U0000233F', 1770 | "sopf;": '\U0001D564', 1771 | "spades;": '\U00002660', 1772 | "spadesuit;": '\U00002660', 1773 | "spar;": '\U00002225', 1774 | "sqcap;": '\U00002293', 1775 | "sqcup;": '\U00002294', 1776 | "sqsub;": '\U0000228F', 1777 | "sqsube;": '\U00002291', 1778 | "sqsubset;": '\U0000228F', 1779 | "sqsubseteq;": '\U00002291', 1780 | "sqsup;": '\U00002290', 1781 | "sqsupe;": '\U00002292', 1782 | "sqsupset;": '\U00002290', 1783 | "sqsupseteq;": '\U00002292', 1784 | "squ;": '\U000025A1', 1785 | "square;": '\U000025A1', 1786 | "squarf;": '\U000025AA', 1787 | "squf;": '\U000025AA', 1788 | "srarr;": '\U00002192', 1789 | "sscr;": '\U0001D4C8', 1790 | "ssetmn;": '\U00002216', 1791 | "ssmile;": '\U00002323', 1792 | "sstarf;": '\U000022C6', 1793 | "star;": '\U00002606', 1794 | "starf;": '\U00002605', 1795 | "straightepsilon;": '\U000003F5', 1796 | "straightphi;": '\U000003D5', 1797 | "strns;": '\U000000AF', 1798 | "sub;": '\U00002282', 1799 | "subE;": '\U00002AC5', 1800 | "subdot;": '\U00002ABD', 1801 | "sube;": '\U00002286', 1802 | "subedot;": '\U00002AC3', 1803 | "submult;": '\U00002AC1', 1804 | "subnE;": '\U00002ACB', 1805 | "subne;": '\U0000228A', 1806 | "subplus;": '\U00002ABF', 1807 | "subrarr;": '\U00002979', 1808 | "subset;": '\U00002282', 1809 | "subseteq;": '\U00002286', 1810 | "subseteqq;": '\U00002AC5', 1811 | "subsetneq;": '\U0000228A', 1812 | "subsetneqq;": '\U00002ACB', 1813 | "subsim;": '\U00002AC7', 1814 | "subsub;": '\U00002AD5', 1815 | "subsup;": '\U00002AD3', 1816 | "succ;": '\U0000227B', 1817 | "succapprox;": '\U00002AB8', 1818 | "succcurlyeq;": '\U0000227D', 1819 | "succeq;": '\U00002AB0', 1820 | "succnapprox;": '\U00002ABA', 1821 | "succneqq;": '\U00002AB6', 1822 | "succnsim;": '\U000022E9', 1823 | "succsim;": '\U0000227F', 1824 | "sum;": '\U00002211', 1825 | "sung;": '\U0000266A', 1826 | "sup;": '\U00002283', 1827 | "sup1;": '\U000000B9', 1828 | "sup2;": '\U000000B2', 1829 | "sup3;": '\U000000B3', 1830 | "supE;": '\U00002AC6', 1831 | "supdot;": '\U00002ABE', 1832 | "supdsub;": '\U00002AD8', 1833 | "supe;": '\U00002287', 1834 | "supedot;": '\U00002AC4', 1835 | "suphsol;": '\U000027C9', 1836 | "suphsub;": '\U00002AD7', 1837 | "suplarr;": '\U0000297B', 1838 | "supmult;": '\U00002AC2', 1839 | "supnE;": '\U00002ACC', 1840 | "supne;": '\U0000228B', 1841 | "supplus;": '\U00002AC0', 1842 | "supset;": '\U00002283', 1843 | "supseteq;": '\U00002287', 1844 | "supseteqq;": '\U00002AC6', 1845 | "supsetneq;": '\U0000228B', 1846 | "supsetneqq;": '\U00002ACC', 1847 | "supsim;": '\U00002AC8', 1848 | "supsub;": '\U00002AD4', 1849 | "supsup;": '\U00002AD6', 1850 | "swArr;": '\U000021D9', 1851 | "swarhk;": '\U00002926', 1852 | "swarr;": '\U00002199', 1853 | "swarrow;": '\U00002199', 1854 | "swnwar;": '\U0000292A', 1855 | "szlig;": '\U000000DF', 1856 | "target;": '\U00002316', 1857 | "tau;": '\U000003C4', 1858 | "tbrk;": '\U000023B4', 1859 | "tcaron;": '\U00000165', 1860 | "tcedil;": '\U00000163', 1861 | "tcy;": '\U00000442', 1862 | "tdot;": '\U000020DB', 1863 | "telrec;": '\U00002315', 1864 | "tfr;": '\U0001D531', 1865 | "there4;": '\U00002234', 1866 | "therefore;": '\U00002234', 1867 | "theta;": '\U000003B8', 1868 | "thetasym;": '\U000003D1', 1869 | "thetav;": '\U000003D1', 1870 | "thickapprox;": '\U00002248', 1871 | "thicksim;": '\U0000223C', 1872 | "thinsp;": '\U00002009', 1873 | "thkap;": '\U00002248', 1874 | "thksim;": '\U0000223C', 1875 | "thorn;": '\U000000FE', 1876 | "tilde;": '\U000002DC', 1877 | "times;": '\U000000D7', 1878 | "timesb;": '\U000022A0', 1879 | "timesbar;": '\U00002A31', 1880 | "timesd;": '\U00002A30', 1881 | "tint;": '\U0000222D', 1882 | "toea;": '\U00002928', 1883 | "top;": '\U000022A4', 1884 | "topbot;": '\U00002336', 1885 | "topcir;": '\U00002AF1', 1886 | "topf;": '\U0001D565', 1887 | "topfork;": '\U00002ADA', 1888 | "tosa;": '\U00002929', 1889 | "tprime;": '\U00002034', 1890 | "trade;": '\U00002122', 1891 | "triangle;": '\U000025B5', 1892 | "triangledown;": '\U000025BF', 1893 | "triangleleft;": '\U000025C3', 1894 | "trianglelefteq;": '\U000022B4', 1895 | "triangleq;": '\U0000225C', 1896 | "triangleright;": '\U000025B9', 1897 | "trianglerighteq;": '\U000022B5', 1898 | "tridot;": '\U000025EC', 1899 | "trie;": '\U0000225C', 1900 | "triminus;": '\U00002A3A', 1901 | "triplus;": '\U00002A39', 1902 | "trisb;": '\U000029CD', 1903 | "tritime;": '\U00002A3B', 1904 | "trpezium;": '\U000023E2', 1905 | "tscr;": '\U0001D4C9', 1906 | "tscy;": '\U00000446', 1907 | "tshcy;": '\U0000045B', 1908 | "tstrok;": '\U00000167', 1909 | "twixt;": '\U0000226C', 1910 | "twoheadleftarrow;": '\U0000219E', 1911 | "twoheadrightarrow;": '\U000021A0', 1912 | "uArr;": '\U000021D1', 1913 | "uHar;": '\U00002963', 1914 | "uacute;": '\U000000FA', 1915 | "uarr;": '\U00002191', 1916 | "ubrcy;": '\U0000045E', 1917 | "ubreve;": '\U0000016D', 1918 | "ucirc;": '\U000000FB', 1919 | "ucy;": '\U00000443', 1920 | "udarr;": '\U000021C5', 1921 | "udblac;": '\U00000171', 1922 | "udhar;": '\U0000296E', 1923 | "ufisht;": '\U0000297E', 1924 | "ufr;": '\U0001D532', 1925 | "ugrave;": '\U000000F9', 1926 | "uharl;": '\U000021BF', 1927 | "uharr;": '\U000021BE', 1928 | "uhblk;": '\U00002580', 1929 | "ulcorn;": '\U0000231C', 1930 | "ulcorner;": '\U0000231C', 1931 | "ulcrop;": '\U0000230F', 1932 | "ultri;": '\U000025F8', 1933 | "umacr;": '\U0000016B', 1934 | "uml;": '\U000000A8', 1935 | "uogon;": '\U00000173', 1936 | "uopf;": '\U0001D566', 1937 | "uparrow;": '\U00002191', 1938 | "updownarrow;": '\U00002195', 1939 | "upharpoonleft;": '\U000021BF', 1940 | "upharpoonright;": '\U000021BE', 1941 | "uplus;": '\U0000228E', 1942 | "upsi;": '\U000003C5', 1943 | "upsih;": '\U000003D2', 1944 | "upsilon;": '\U000003C5', 1945 | "upuparrows;": '\U000021C8', 1946 | "urcorn;": '\U0000231D', 1947 | "urcorner;": '\U0000231D', 1948 | "urcrop;": '\U0000230E', 1949 | "uring;": '\U0000016F', 1950 | "urtri;": '\U000025F9', 1951 | "uscr;": '\U0001D4CA', 1952 | "utdot;": '\U000022F0', 1953 | "utilde;": '\U00000169', 1954 | "utri;": '\U000025B5', 1955 | "utrif;": '\U000025B4', 1956 | "uuarr;": '\U000021C8', 1957 | "uuml;": '\U000000FC', 1958 | "uwangle;": '\U000029A7', 1959 | "vArr;": '\U000021D5', 1960 | "vBar;": '\U00002AE8', 1961 | "vBarv;": '\U00002AE9', 1962 | "vDash;": '\U000022A8', 1963 | "vangrt;": '\U0000299C', 1964 | "varepsilon;": '\U000003F5', 1965 | "varkappa;": '\U000003F0', 1966 | "varnothing;": '\U00002205', 1967 | "varphi;": '\U000003D5', 1968 | "varpi;": '\U000003D6', 1969 | "varpropto;": '\U0000221D', 1970 | "varr;": '\U00002195', 1971 | "varrho;": '\U000003F1', 1972 | "varsigma;": '\U000003C2', 1973 | "vartheta;": '\U000003D1', 1974 | "vartriangleleft;": '\U000022B2', 1975 | "vartriangleright;": '\U000022B3', 1976 | "vcy;": '\U00000432', 1977 | "vdash;": '\U000022A2', 1978 | "vee;": '\U00002228', 1979 | "veebar;": '\U000022BB', 1980 | "veeeq;": '\U0000225A', 1981 | "vellip;": '\U000022EE', 1982 | "verbar;": '\U0000007C', 1983 | "vert;": '\U0000007C', 1984 | "vfr;": '\U0001D533', 1985 | "vltri;": '\U000022B2', 1986 | "vopf;": '\U0001D567', 1987 | "vprop;": '\U0000221D', 1988 | "vrtri;": '\U000022B3', 1989 | "vscr;": '\U0001D4CB', 1990 | "vzigzag;": '\U0000299A', 1991 | "wcirc;": '\U00000175', 1992 | "wedbar;": '\U00002A5F', 1993 | "wedge;": '\U00002227', 1994 | "wedgeq;": '\U00002259', 1995 | "weierp;": '\U00002118', 1996 | "wfr;": '\U0001D534', 1997 | "wopf;": '\U0001D568', 1998 | "wp;": '\U00002118', 1999 | "wr;": '\U00002240', 2000 | "wreath;": '\U00002240', 2001 | "wscr;": '\U0001D4CC', 2002 | "xcap;": '\U000022C2', 2003 | "xcirc;": '\U000025EF', 2004 | "xcup;": '\U000022C3', 2005 | "xdtri;": '\U000025BD', 2006 | "xfr;": '\U0001D535', 2007 | "xhArr;": '\U000027FA', 2008 | "xharr;": '\U000027F7', 2009 | "xi;": '\U000003BE', 2010 | "xlArr;": '\U000027F8', 2011 | "xlarr;": '\U000027F5', 2012 | "xmap;": '\U000027FC', 2013 | "xnis;": '\U000022FB', 2014 | "xodot;": '\U00002A00', 2015 | "xopf;": '\U0001D569', 2016 | "xoplus;": '\U00002A01', 2017 | "xotime;": '\U00002A02', 2018 | "xrArr;": '\U000027F9', 2019 | "xrarr;": '\U000027F6', 2020 | "xscr;": '\U0001D4CD', 2021 | "xsqcup;": '\U00002A06', 2022 | "xuplus;": '\U00002A04', 2023 | "xutri;": '\U000025B3', 2024 | "xvee;": '\U000022C1', 2025 | "xwedge;": '\U000022C0', 2026 | "yacute;": '\U000000FD', 2027 | "yacy;": '\U0000044F', 2028 | "ycirc;": '\U00000177', 2029 | "ycy;": '\U0000044B', 2030 | "yen;": '\U000000A5', 2031 | "yfr;": '\U0001D536', 2032 | "yicy;": '\U00000457', 2033 | "yopf;": '\U0001D56A', 2034 | "yscr;": '\U0001D4CE', 2035 | "yucy;": '\U0000044E', 2036 | "yuml;": '\U000000FF', 2037 | "zacute;": '\U0000017A', 2038 | "zcaron;": '\U0000017E', 2039 | "zcy;": '\U00000437', 2040 | "zdot;": '\U0000017C', 2041 | "zeetrf;": '\U00002128', 2042 | "zeta;": '\U000003B6', 2043 | "zfr;": '\U0001D537', 2044 | "zhcy;": '\U00000436', 2045 | "zigrarr;": '\U000021DD', 2046 | "zopf;": '\U0001D56B', 2047 | "zscr;": '\U0001D4CF', 2048 | "zwj;": '\U0000200D', 2049 | "zwnj;": '\U0000200C', 2050 | "AElig": '\U000000C6', 2051 | "AMP": '\U00000026', 2052 | "Aacute": '\U000000C1', 2053 | "Acirc": '\U000000C2', 2054 | "Agrave": '\U000000C0', 2055 | "Aring": '\U000000C5', 2056 | "Atilde": '\U000000C3', 2057 | "Auml": '\U000000C4', 2058 | "COPY": '\U000000A9', 2059 | "Ccedil": '\U000000C7', 2060 | "ETH": '\U000000D0', 2061 | "Eacute": '\U000000C9', 2062 | "Ecirc": '\U000000CA', 2063 | "Egrave": '\U000000C8', 2064 | "Euml": '\U000000CB', 2065 | "GT": '\U0000003E', 2066 | "Iacute": '\U000000CD', 2067 | "Icirc": '\U000000CE', 2068 | "Igrave": '\U000000CC', 2069 | "Iuml": '\U000000CF', 2070 | "LT": '\U0000003C', 2071 | "Ntilde": '\U000000D1', 2072 | "Oacute": '\U000000D3', 2073 | "Ocirc": '\U000000D4', 2074 | "Ograve": '\U000000D2', 2075 | "Oslash": '\U000000D8', 2076 | "Otilde": '\U000000D5', 2077 | "Ouml": '\U000000D6', 2078 | "QUOT": '\U00000022', 2079 | "REG": '\U000000AE', 2080 | "THORN": '\U000000DE', 2081 | "Uacute": '\U000000DA', 2082 | "Ucirc": '\U000000DB', 2083 | "Ugrave": '\U000000D9', 2084 | "Uuml": '\U000000DC', 2085 | "Yacute": '\U000000DD', 2086 | "aacute": '\U000000E1', 2087 | "acirc": '\U000000E2', 2088 | "acute": '\U000000B4', 2089 | "aelig": '\U000000E6', 2090 | "agrave": '\U000000E0', 2091 | "amp": '\U00000026', 2092 | "aring": '\U000000E5', 2093 | "atilde": '\U000000E3', 2094 | "auml": '\U000000E4', 2095 | "brvbar": '\U000000A6', 2096 | "ccedil": '\U000000E7', 2097 | "cedil": '\U000000B8', 2098 | "cent": '\U000000A2', 2099 | "copy": '\U000000A9', 2100 | "curren": '\U000000A4', 2101 | "deg": '\U000000B0', 2102 | "divide": '\U000000F7', 2103 | "eacute": '\U000000E9', 2104 | "ecirc": '\U000000EA', 2105 | "egrave": '\U000000E8', 2106 | "eth": '\U000000F0', 2107 | "euml": '\U000000EB', 2108 | "frac12": '\U000000BD', 2109 | "frac14": '\U000000BC', 2110 | "frac34": '\U000000BE', 2111 | "gt": '\U0000003E', 2112 | "iacute": '\U000000ED', 2113 | "icirc": '\U000000EE', 2114 | "iexcl": '\U000000A1', 2115 | "igrave": '\U000000EC', 2116 | "iquest": '\U000000BF', 2117 | "iuml": '\U000000EF', 2118 | "laquo": '\U000000AB', 2119 | "lt": '\U0000003C', 2120 | "macr": '\U000000AF', 2121 | "micro": '\U000000B5', 2122 | "middot": '\U000000B7', 2123 | "nbsp": '\U000000A0', 2124 | "not": '\U000000AC', 2125 | "ntilde": '\U000000F1', 2126 | "oacute": '\U000000F3', 2127 | "ocirc": '\U000000F4', 2128 | "ograve": '\U000000F2', 2129 | "ordf": '\U000000AA', 2130 | "ordm": '\U000000BA', 2131 | "oslash": '\U000000F8', 2132 | "otilde": '\U000000F5', 2133 | "ouml": '\U000000F6', 2134 | "para": '\U000000B6', 2135 | "plusmn": '\U000000B1', 2136 | "pound": '\U000000A3', 2137 | "quot": '\U00000022', 2138 | "raquo": '\U000000BB', 2139 | "reg": '\U000000AE', 2140 | "sect": '\U000000A7', 2141 | "shy": '\U000000AD', 2142 | "sup1": '\U000000B9', 2143 | "sup2": '\U000000B2', 2144 | "sup3": '\U000000B3', 2145 | "szlig": '\U000000DF', 2146 | "thorn": '\U000000FE', 2147 | "times": '\U000000D7', 2148 | "uacute": '\U000000FA', 2149 | "ucirc": '\U000000FB', 2150 | "ugrave": '\U000000F9', 2151 | "uml": '\U000000A8', 2152 | "uuml": '\U000000FC', 2153 | "yacute": '\U000000FD', 2154 | "yen": '\U000000A5', 2155 | "yuml": '\U000000FF', 2156 | } 2157 | 2158 | // HTML entities that are two unicode codepoints. 2159 | var entity2 = map[string][2]rune{ 2160 | "nLt;": {'\u226A', '\u20D2'}, 2161 | "nGt;": {'\u226B', '\u20D2'}, 2162 | "NotEqualTilde;": {'\u2242', '\u0338'}, 2163 | "NotGreaterFullEqual;": {'\u2267', '\u0338'}, 2164 | "NotGreaterGreater;": {'\u226B', '\u0338'}, 2165 | "NotGreaterSlantEqual;": {'\u2A7E', '\u0338'}, 2166 | "NotHumpDownHump;": {'\u224E', '\u0338'}, 2167 | "NotHumpEqual;": {'\u224F', '\u0338'}, 2168 | "NotLeftTriangleBar;": {'\u29CF', '\u0338'}, 2169 | "NotLessLess;": {'\u226A', '\u0338'}, 2170 | "NotLessSlantEqual;": {'\u2A7D', '\u0338'}, 2171 | "NotNestedGreaterGreater;": {'\u2AA2', '\u0338'}, 2172 | "NotNestedLessLess;": {'\u2AA1', '\u0338'}, 2173 | "NotPrecedesEqual;": {'\u2AAF', '\u0338'}, 2174 | "NotRightTriangleBar;": {'\u29D0', '\u0338'}, 2175 | "NotSquareSubset;": {'\u228F', '\u0338'}, 2176 | "NotSquareSuperset;": {'\u2290', '\u0338'}, 2177 | "NotSubset;": {'\u2282', '\u20D2'}, 2178 | "NotSucceedsEqual;": {'\u2AB0', '\u0338'}, 2179 | "NotSucceedsTilde;": {'\u227F', '\u0338'}, 2180 | "NotSuperset;": {'\u2283', '\u20D2'}, 2181 | "ThickSpace;": {'\u205F', '\u200A'}, 2182 | "acE;": {'\u223E', '\u0333'}, 2183 | "bne;": {'\u003D', '\u20E5'}, 2184 | "bnequiv;": {'\u2261', '\u20E5'}, 2185 | "caps;": {'\u2229', '\uFE00'}, 2186 | "cups;": {'\u222A', '\uFE00'}, 2187 | "fjlig;": {'\u0066', '\u006A'}, 2188 | "gesl;": {'\u22DB', '\uFE00'}, 2189 | "gvertneqq;": {'\u2269', '\uFE00'}, 2190 | "gvnE;": {'\u2269', '\uFE00'}, 2191 | "lates;": {'\u2AAD', '\uFE00'}, 2192 | "lesg;": {'\u22DA', '\uFE00'}, 2193 | "lvertneqq;": {'\u2268', '\uFE00'}, 2194 | "lvnE;": {'\u2268', '\uFE00'}, 2195 | "nGg;": {'\u22D9', '\u0338'}, 2196 | "nGtv;": {'\u226B', '\u0338'}, 2197 | "nLl;": {'\u22D8', '\u0338'}, 2198 | "nLtv;": {'\u226A', '\u0338'}, 2199 | "nang;": {'\u2220', '\u20D2'}, 2200 | "napE;": {'\u2A70', '\u0338'}, 2201 | "napid;": {'\u224B', '\u0338'}, 2202 | "nbump;": {'\u224E', '\u0338'}, 2203 | "nbumpe;": {'\u224F', '\u0338'}, 2204 | "ncongdot;": {'\u2A6D', '\u0338'}, 2205 | "nedot;": {'\u2250', '\u0338'}, 2206 | "nesim;": {'\u2242', '\u0338'}, 2207 | "ngE;": {'\u2267', '\u0338'}, 2208 | "ngeqq;": {'\u2267', '\u0338'}, 2209 | "ngeqslant;": {'\u2A7E', '\u0338'}, 2210 | "nges;": {'\u2A7E', '\u0338'}, 2211 | "nlE;": {'\u2266', '\u0338'}, 2212 | "nleqq;": {'\u2266', '\u0338'}, 2213 | "nleqslant;": {'\u2A7D', '\u0338'}, 2214 | "nles;": {'\u2A7D', '\u0338'}, 2215 | "notinE;": {'\u22F9', '\u0338'}, 2216 | "notindot;": {'\u22F5', '\u0338'}, 2217 | "nparsl;": {'\u2AFD', '\u20E5'}, 2218 | "npart;": {'\u2202', '\u0338'}, 2219 | "npre;": {'\u2AAF', '\u0338'}, 2220 | "npreceq;": {'\u2AAF', '\u0338'}, 2221 | "nrarrc;": {'\u2933', '\u0338'}, 2222 | "nrarrw;": {'\u219D', '\u0338'}, 2223 | "nsce;": {'\u2AB0', '\u0338'}, 2224 | "nsubE;": {'\u2AC5', '\u0338'}, 2225 | "nsubset;": {'\u2282', '\u20D2'}, 2226 | "nsubseteqq;": {'\u2AC5', '\u0338'}, 2227 | "nsucceq;": {'\u2AB0', '\u0338'}, 2228 | "nsupE;": {'\u2AC6', '\u0338'}, 2229 | "nsupset;": {'\u2283', '\u20D2'}, 2230 | "nsupseteqq;": {'\u2AC6', '\u0338'}, 2231 | "nvap;": {'\u224D', '\u20D2'}, 2232 | "nvge;": {'\u2265', '\u20D2'}, 2233 | "nvgt;": {'\u003E', '\u20D2'}, 2234 | "nvle;": {'\u2264', '\u20D2'}, 2235 | "nvlt;": {'\u003C', '\u20D2'}, 2236 | "nvltrie;": {'\u22B4', '\u20D2'}, 2237 | "nvrtrie;": {'\u22B5', '\u20D2'}, 2238 | "nvsim;": {'\u223C', '\u20D2'}, 2239 | "race;": {'\u223D', '\u0331'}, 2240 | "smtes;": {'\u2AAC', '\uFE00'}, 2241 | "sqcaps;": {'\u2293', '\uFE00'}, 2242 | "sqcups;": {'\u2294', '\uFE00'}, 2243 | "varsubsetneq;": {'\u228A', '\uFE00'}, 2244 | "varsubsetneqq;": {'\u2ACB', '\uFE00'}, 2245 | "varsupsetneq;": {'\u228B', '\uFE00'}, 2246 | "varsupsetneqq;": {'\u2ACC', '\uFE00'}, 2247 | "vnsub;": {'\u2282', '\u20D2'}, 2248 | "vnsup;": {'\u2283', '\u20D2'}, 2249 | "vsubnE;": {'\u2ACB', '\uFE00'}, 2250 | "vsubne;": {'\u228A', '\uFE00'}, 2251 | "vsupnE;": {'\u2ACC', '\uFE00'}, 2252 | "vsupne;": {'\u228B', '\uFE00'}, 2253 | } 2254 | -------------------------------------------------------------------------------- /euc-jp.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | // Converters for the EUC-JP encoding 4 | 5 | import ( 6 | "sync" 7 | ) 8 | 9 | func init() { 10 | RegisterCharset(&Charset{ 11 | Name: "EUC-JP", 12 | Aliases: []string{"extended_unix_code_packed_format_for_japanese", "cseucpkdfmtjapanese"}, 13 | NewDecoder: func() Decoder { 14 | eucJPOnce.Do(makeEUCJPTable) 15 | return eucJPTable.Decoder() 16 | }, 17 | NewEncoder: func() Encoder { 18 | eucJPOnce.Do(makeEUCJPTable) 19 | return eucJPTable.Encoder() 20 | }, 21 | }) 22 | } 23 | 24 | var eucJPOnce sync.Once 25 | var eucJPTable MBCSTable 26 | 27 | func makeEUCJPTable() { 28 | var b [3]byte 29 | 30 | b[0] = 0x8f 31 | for jis0212, unicode := range jis0212ToUnicode { 32 | if unicode == 0 { 33 | continue 34 | } 35 | 36 | b[1] = byte(jis0212>>8) | 128 37 | b[2] = byte(jis0212) | 128 38 | eucJPTable.AddCharacter(rune(unicode), string(b[:3])) 39 | } 40 | 41 | for jis0208, unicode := range jis0208ToUnicode { 42 | if unicode == 0 { 43 | continue 44 | } 45 | 46 | b[0] = byte(jis0208>>8) | 128 47 | b[1] = byte(jis0208) | 128 48 | eucJPTable.AddCharacter(rune(unicode), string(b[:2])) 49 | } 50 | 51 | b[0] = 0x8e 52 | for i := 128; i < 256; i++ { 53 | unicode := jis0201ToUnicode[i] 54 | if unicode == 0 { 55 | continue 56 | } 57 | 58 | b[1] = byte(i) 59 | eucJPTable.AddCharacter(rune(unicode), string(b[:2])) 60 | } 61 | 62 | for i := '\x00'; i < 128; i++ { 63 | var unicode rune 64 | if i < 32 || i == 127 { 65 | unicode = i 66 | } else { 67 | unicode = rune(jis0201ToUnicode[i]) 68 | if unicode == 0 { 69 | continue 70 | } 71 | } 72 | 73 | eucJPTable.AddCharacter(unicode, string(byte(i))) 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /fallback.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | // FallbackDecoder combines a series of Decoders into one. 4 | // If the first Decoder returns a status of INVALID_CHAR, the others are tried as well. 5 | // 6 | // Note: if the text to be decoded ends with a sequence of bytes that is not a valid character in the first charset, 7 | // but it could be the beginning of a valid character, the FallbackDecoder will give a status of NO_ROOM instead of 8 | // falling back to the other Decoders. 9 | func FallbackDecoder(decoders ...Decoder) Decoder { 10 | return func(p []byte) (c rune, size int, status Status) { 11 | for _, d := range decoders { 12 | c, size, status = d(p) 13 | if status != INVALID_CHAR { 14 | return 15 | } 16 | } 17 | return 0, 1, INVALID_CHAR 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /gb18030.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | import ( 4 | "sync" 5 | ) 6 | 7 | // Converters for GB18030 encoding. 8 | 9 | func init() { 10 | RegisterCharset(&Charset{ 11 | Name: "GB18030", 12 | NewDecoder: func() Decoder { 13 | gb18030Once.Do(buildGB18030Tables) 14 | return decodeGB18030Rune 15 | }, 16 | NewEncoder: func() Encoder { 17 | gb18030Once.Do(buildGB18030Tables) 18 | return encodeGB18030Rune 19 | }, 20 | }) 21 | } 22 | 23 | func decodeGB18030Rune(p []byte) (r rune, size int, status Status) { 24 | if len(p) == 0 { 25 | status = NO_ROOM 26 | return 27 | } 28 | 29 | b := p[0] 30 | if b < 128 { 31 | return rune(b), 1, SUCCESS 32 | } 33 | 34 | if len(p) < 2 { 35 | status = NO_ROOM 36 | return 37 | } 38 | 39 | if p[0] < 0x81 || p[0] > 0xfe { 40 | return 0xfffd, 1, INVALID_CHAR 41 | } 42 | 43 | if p[1] >= 0x40 { 44 | // 2-byte character 45 | c := uint16(p[0])<<8 + uint16(p[1]) 46 | r = rune(gbkToUnicode[c]) 47 | if r == 0 { 48 | r = gbkToUnicodeExtra[c] 49 | } 50 | 51 | if r != 0 { 52 | return r, 2, SUCCESS 53 | } 54 | } else if p[1] >= 0x30 { 55 | // 4-byte character 56 | if len(p) < 4 { 57 | return 0, 0, NO_ROOM 58 | } 59 | if p[2] < 0x81 || p[2] > 0xfe || p[3] < 0x30 || p[3] > 0x39 { 60 | return 0xfffd, 1, INVALID_CHAR 61 | } 62 | 63 | code := uint32(p[0])<<24 + uint32(p[1])<<16 + uint32(p[2])<<8 + uint32(p[3]) 64 | lin := gb18030Linear(code) 65 | 66 | if lin <= maxGB18030Linear { 67 | r = rune(gb18030LinearToUnicode[lin]) 68 | if r != 0 { 69 | return r, 4, SUCCESS 70 | } 71 | } 72 | 73 | for _, rng := range gb18030Ranges { 74 | if lin >= rng.firstGB && lin <= rng.lastGB { 75 | return rng.firstRune + rune(lin) - rune(rng.firstGB), 4, SUCCESS 76 | } 77 | } 78 | } 79 | 80 | return 0xfffd, 1, INVALID_CHAR 81 | } 82 | 83 | func encodeGB18030Rune(p []byte, r rune) (size int, status Status) { 84 | if len(p) == 0 { 85 | status = NO_ROOM 86 | return 87 | } 88 | 89 | if r < 128 { 90 | p[0] = byte(r) 91 | return 1, SUCCESS 92 | } 93 | 94 | if len(p) < 2 { 95 | status = NO_ROOM 96 | return 97 | } 98 | 99 | var c uint16 100 | if r < 0x10000 { 101 | c = unicodeToGBK[r] 102 | } else { 103 | c = unicodeToGBKExtra[r] 104 | } 105 | 106 | if c != 0 { 107 | p[0] = byte(c >> 8) 108 | p[1] = byte(c) 109 | return 2, SUCCESS 110 | } 111 | 112 | if len(p) < 4 { 113 | return 0, NO_ROOM 114 | } 115 | 116 | if r < 0x10000 { 117 | f := unicodeToGB18030[r] 118 | if f != 0 { 119 | p[0] = byte(f >> 24) 120 | p[1] = byte(f >> 16) 121 | p[2] = byte(f >> 8) 122 | p[3] = byte(f) 123 | return 4, SUCCESS 124 | } 125 | } 126 | 127 | for _, rng := range gb18030Ranges { 128 | if r >= rng.firstRune && r <= rng.lastRune { 129 | lin := rng.firstGB + uint32(r) - uint32(rng.firstRune) 130 | p[0] = byte(lin/(10*126*10)) + 0x81 131 | p[1] = byte(lin/(126*10)%10) + 0x30 132 | p[2] = byte(lin/10%126) + 0x81 133 | p[3] = byte(lin%10) + 0x30 134 | return 4, SUCCESS 135 | } 136 | } 137 | 138 | p[0] = 0x1a 139 | return 1, INVALID_CHAR 140 | } 141 | 142 | var gb18030Once sync.Once 143 | 144 | // Mapping from gb18039Linear values to Unicode. 145 | var gb18030LinearToUnicode []uint16 146 | 147 | var unicodeToGB18030 []uint32 148 | 149 | func buildGB18030Tables() { 150 | gb18030LinearToUnicode = make([]uint16, maxGB18030Linear+1) 151 | unicodeToGB18030 = make([]uint32, 65536) 152 | for _, data := range gb18030Data { 153 | gb18030LinearToUnicode[gb18030Linear(data.gb18030)] = data.unicode 154 | unicodeToGB18030[data.unicode] = data.gb18030 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /gbk.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | // Converters for GBK encoding. 4 | 5 | func init() { 6 | RegisterCharset(&Charset{ 7 | Name: "GBK", 8 | NewDecoder: func() Decoder { 9 | return decodeGBKRune 10 | }, 11 | NewEncoder: func() Encoder { 12 | return encodeGBKRune 13 | }, 14 | }) 15 | } 16 | 17 | func decodeGBKRune(p []byte) (r rune, size int, status Status) { 18 | if len(p) == 0 { 19 | status = NO_ROOM 20 | return 21 | } 22 | 23 | b := p[0] 24 | if b < 128 { 25 | return rune(b), 1, SUCCESS 26 | } 27 | 28 | if len(p) < 2 { 29 | status = NO_ROOM 30 | return 31 | } 32 | 33 | c := uint16(p[0])<<8 + uint16(p[1]) 34 | r = rune(gbkToUnicode[c]) 35 | if r == 0 { 36 | r = gbkToUnicodeExtra[c] 37 | } 38 | 39 | if r != 0 { 40 | return r, 2, SUCCESS 41 | } 42 | 43 | return 0xfffd, 1, INVALID_CHAR 44 | } 45 | 46 | func encodeGBKRune(p []byte, r rune) (size int, status Status) { 47 | if len(p) == 0 { 48 | status = NO_ROOM 49 | return 50 | } 51 | 52 | if r < 128 { 53 | p[0] = byte(r) 54 | return 1, SUCCESS 55 | } 56 | 57 | if len(p) < 2 { 58 | status = NO_ROOM 59 | return 60 | } 61 | 62 | var c uint16 63 | if r < 0x10000 { 64 | c = unicodeToGBK[r] 65 | } else { 66 | c = unicodeToGBKExtra[r] 67 | } 68 | 69 | if c != 0 { 70 | p[0] = byte(c >> 8) 71 | p[1] = byte(c) 72 | return 2, SUCCESS 73 | } 74 | 75 | p[0] = 0x1a 76 | return 1, INVALID_CHAR 77 | } 78 | -------------------------------------------------------------------------------- /jis0201-data.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | var jis0201ToUnicode = [256]uint16{ 4 | 0x20: 0x0020, // SPACE 5 | 0x21: 0x0021, // EXCLAMATION MARK 6 | 0x22: 0x0022, // QUOTATION MARK 7 | 0x23: 0x0023, // NUMBER SIGN 8 | 0x24: 0x0024, // DOLLAR SIGN 9 | 0x25: 0x0025, // PERCENT SIGN 10 | 0x26: 0x0026, // AMPERSAND 11 | 0x27: 0x0027, // APOSTROPHE 12 | 0x28: 0x0028, // LEFT PARENTHESIS 13 | 0x29: 0x0029, // RIGHT PARENTHESIS 14 | 0x2A: 0x002A, // ASTERISK 15 | 0x2B: 0x002B, // PLUS SIGN 16 | 0x2C: 0x002C, // COMMA 17 | 0x2D: 0x002D, // HYPHEN-MINUS 18 | 0x2E: 0x002E, // FULL STOP 19 | 0x2F: 0x002F, // SOLIDUS 20 | 0x30: 0x0030, // DIGIT ZERO 21 | 0x31: 0x0031, // DIGIT ONE 22 | 0x32: 0x0032, // DIGIT TWO 23 | 0x33: 0x0033, // DIGIT THREE 24 | 0x34: 0x0034, // DIGIT FOUR 25 | 0x35: 0x0035, // DIGIT FIVE 26 | 0x36: 0x0036, // DIGIT SIX 27 | 0x37: 0x0037, // DIGIT SEVEN 28 | 0x38: 0x0038, // DIGIT EIGHT 29 | 0x39: 0x0039, // DIGIT NINE 30 | 0x3A: 0x003A, // COLON 31 | 0x3B: 0x003B, // SEMICOLON 32 | 0x3C: 0x003C, // LESS-THAN SIGN 33 | 0x3D: 0x003D, // EQUALS SIGN 34 | 0x3E: 0x003E, // GREATER-THAN SIGN 35 | 0x3F: 0x003F, // QUESTION MARK 36 | 0x40: 0x0040, // COMMERCIAL AT 37 | 0x41: 0x0041, // LATIN CAPITAL LETTER A 38 | 0x42: 0x0042, // LATIN CAPITAL LETTER B 39 | 0x43: 0x0043, // LATIN CAPITAL LETTER C 40 | 0x44: 0x0044, // LATIN CAPITAL LETTER D 41 | 0x45: 0x0045, // LATIN CAPITAL LETTER E 42 | 0x46: 0x0046, // LATIN CAPITAL LETTER F 43 | 0x47: 0x0047, // LATIN CAPITAL LETTER G 44 | 0x48: 0x0048, // LATIN CAPITAL LETTER H 45 | 0x49: 0x0049, // LATIN CAPITAL LETTER I 46 | 0x4A: 0x004A, // LATIN CAPITAL LETTER J 47 | 0x4B: 0x004B, // LATIN CAPITAL LETTER K 48 | 0x4C: 0x004C, // LATIN CAPITAL LETTER L 49 | 0x4D: 0x004D, // LATIN CAPITAL LETTER M 50 | 0x4E: 0x004E, // LATIN CAPITAL LETTER N 51 | 0x4F: 0x004F, // LATIN CAPITAL LETTER O 52 | 0x50: 0x0050, // LATIN CAPITAL LETTER P 53 | 0x51: 0x0051, // LATIN CAPITAL LETTER Q 54 | 0x52: 0x0052, // LATIN CAPITAL LETTER R 55 | 0x53: 0x0053, // LATIN CAPITAL LETTER S 56 | 0x54: 0x0054, // LATIN CAPITAL LETTER T 57 | 0x55: 0x0055, // LATIN CAPITAL LETTER U 58 | 0x56: 0x0056, // LATIN CAPITAL LETTER V 59 | 0x57: 0x0057, // LATIN CAPITAL LETTER W 60 | 0x58: 0x0058, // LATIN CAPITAL LETTER X 61 | 0x59: 0x0059, // LATIN CAPITAL LETTER Y 62 | 0x5A: 0x005A, // LATIN CAPITAL LETTER Z 63 | 0x5B: 0x005B, // LEFT SQUARE BRACKET 64 | 0x5C: 0x00A5, // YEN SIGN 65 | 0x5D: 0x005D, // RIGHT SQUARE BRACKET 66 | 0x5E: 0x005E, // CIRCUMFLEX ACCENT 67 | 0x5F: 0x005F, // LOW LINE 68 | 0x60: 0x0060, // GRAVE ACCENT 69 | 0x61: 0x0061, // LATIN SMALL LETTER A 70 | 0x62: 0x0062, // LATIN SMALL LETTER B 71 | 0x63: 0x0063, // LATIN SMALL LETTER C 72 | 0x64: 0x0064, // LATIN SMALL LETTER D 73 | 0x65: 0x0065, // LATIN SMALL LETTER E 74 | 0x66: 0x0066, // LATIN SMALL LETTER F 75 | 0x67: 0x0067, // LATIN SMALL LETTER G 76 | 0x68: 0x0068, // LATIN SMALL LETTER H 77 | 0x69: 0x0069, // LATIN SMALL LETTER I 78 | 0x6A: 0x006A, // LATIN SMALL LETTER J 79 | 0x6B: 0x006B, // LATIN SMALL LETTER K 80 | 0x6C: 0x006C, // LATIN SMALL LETTER L 81 | 0x6D: 0x006D, // LATIN SMALL LETTER M 82 | 0x6E: 0x006E, // LATIN SMALL LETTER N 83 | 0x6F: 0x006F, // LATIN SMALL LETTER O 84 | 0x70: 0x0070, // LATIN SMALL LETTER P 85 | 0x71: 0x0071, // LATIN SMALL LETTER Q 86 | 0x72: 0x0072, // LATIN SMALL LETTER R 87 | 0x73: 0x0073, // LATIN SMALL LETTER S 88 | 0x74: 0x0074, // LATIN SMALL LETTER T 89 | 0x75: 0x0075, // LATIN SMALL LETTER U 90 | 0x76: 0x0076, // LATIN SMALL LETTER V 91 | 0x77: 0x0077, // LATIN SMALL LETTER W 92 | 0x78: 0x0078, // LATIN SMALL LETTER X 93 | 0x79: 0x0079, // LATIN SMALL LETTER Y 94 | 0x7A: 0x007A, // LATIN SMALL LETTER Z 95 | 0x7B: 0x007B, // LEFT CURLY BRACKET 96 | 0x7C: 0x007C, // VERTICAL LINE 97 | 0x7D: 0x007D, // RIGHT CURLY BRACKET 98 | 0x7E: 0x203E, // OVERLINE 99 | 0xA1: 0xFF61, // HALFWIDTH IDEOGRAPHIC FULL STOP 100 | 0xA2: 0xFF62, // HALFWIDTH LEFT CORNER BRACKET 101 | 0xA3: 0xFF63, // HALFWIDTH RIGHT CORNER BRACKET 102 | 0xA4: 0xFF64, // HALFWIDTH IDEOGRAPHIC COMMA 103 | 0xA5: 0xFF65, // HALFWIDTH KATAKANA MIDDLE DOT 104 | 0xA6: 0xFF66, // HALFWIDTH KATAKANA LETTER WO 105 | 0xA7: 0xFF67, // HALFWIDTH KATAKANA LETTER SMALL A 106 | 0xA8: 0xFF68, // HALFWIDTH KATAKANA LETTER SMALL I 107 | 0xA9: 0xFF69, // HALFWIDTH KATAKANA LETTER SMALL U 108 | 0xAA: 0xFF6A, // HALFWIDTH KATAKANA LETTER SMALL E 109 | 0xAB: 0xFF6B, // HALFWIDTH KATAKANA LETTER SMALL O 110 | 0xAC: 0xFF6C, // HALFWIDTH KATAKANA LETTER SMALL YA 111 | 0xAD: 0xFF6D, // HALFWIDTH KATAKANA LETTER SMALL YU 112 | 0xAE: 0xFF6E, // HALFWIDTH KATAKANA LETTER SMALL YO 113 | 0xAF: 0xFF6F, // HALFWIDTH KATAKANA LETTER SMALL TU 114 | 0xB0: 0xFF70, // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK 115 | 0xB1: 0xFF71, // HALFWIDTH KATAKANA LETTER A 116 | 0xB2: 0xFF72, // HALFWIDTH KATAKANA LETTER I 117 | 0xB3: 0xFF73, // HALFWIDTH KATAKANA LETTER U 118 | 0xB4: 0xFF74, // HALFWIDTH KATAKANA LETTER E 119 | 0xB5: 0xFF75, // HALFWIDTH KATAKANA LETTER O 120 | 0xB6: 0xFF76, // HALFWIDTH KATAKANA LETTER KA 121 | 0xB7: 0xFF77, // HALFWIDTH KATAKANA LETTER KI 122 | 0xB8: 0xFF78, // HALFWIDTH KATAKANA LETTER KU 123 | 0xB9: 0xFF79, // HALFWIDTH KATAKANA LETTER KE 124 | 0xBA: 0xFF7A, // HALFWIDTH KATAKANA LETTER KO 125 | 0xBB: 0xFF7B, // HALFWIDTH KATAKANA LETTER SA 126 | 0xBC: 0xFF7C, // HALFWIDTH KATAKANA LETTER SI 127 | 0xBD: 0xFF7D, // HALFWIDTH KATAKANA LETTER SU 128 | 0xBE: 0xFF7E, // HALFWIDTH KATAKANA LETTER SE 129 | 0xBF: 0xFF7F, // HALFWIDTH KATAKANA LETTER SO 130 | 0xC0: 0xFF80, // HALFWIDTH KATAKANA LETTER TA 131 | 0xC1: 0xFF81, // HALFWIDTH KATAKANA LETTER TI 132 | 0xC2: 0xFF82, // HALFWIDTH KATAKANA LETTER TU 133 | 0xC3: 0xFF83, // HALFWIDTH KATAKANA LETTER TE 134 | 0xC4: 0xFF84, // HALFWIDTH KATAKANA LETTER TO 135 | 0xC5: 0xFF85, // HALFWIDTH KATAKANA LETTER NA 136 | 0xC6: 0xFF86, // HALFWIDTH KATAKANA LETTER NI 137 | 0xC7: 0xFF87, // HALFWIDTH KATAKANA LETTER NU 138 | 0xC8: 0xFF88, // HALFWIDTH KATAKANA LETTER NE 139 | 0xC9: 0xFF89, // HALFWIDTH KATAKANA LETTER NO 140 | 0xCA: 0xFF8A, // HALFWIDTH KATAKANA LETTER HA 141 | 0xCB: 0xFF8B, // HALFWIDTH KATAKANA LETTER HI 142 | 0xCC: 0xFF8C, // HALFWIDTH KATAKANA LETTER HU 143 | 0xCD: 0xFF8D, // HALFWIDTH KATAKANA LETTER HE 144 | 0xCE: 0xFF8E, // HALFWIDTH KATAKANA LETTER HO 145 | 0xCF: 0xFF8F, // HALFWIDTH KATAKANA LETTER MA 146 | 0xD0: 0xFF90, // HALFWIDTH KATAKANA LETTER MI 147 | 0xD1: 0xFF91, // HALFWIDTH KATAKANA LETTER MU 148 | 0xD2: 0xFF92, // HALFWIDTH KATAKANA LETTER ME 149 | 0xD3: 0xFF93, // HALFWIDTH KATAKANA LETTER MO 150 | 0xD4: 0xFF94, // HALFWIDTH KATAKANA LETTER YA 151 | 0xD5: 0xFF95, // HALFWIDTH KATAKANA LETTER YU 152 | 0xD6: 0xFF96, // HALFWIDTH KATAKANA LETTER YO 153 | 0xD7: 0xFF97, // HALFWIDTH KATAKANA LETTER RA 154 | 0xD8: 0xFF98, // HALFWIDTH KATAKANA LETTER RI 155 | 0xD9: 0xFF99, // HALFWIDTH KATAKANA LETTER RU 156 | 0xDA: 0xFF9A, // HALFWIDTH KATAKANA LETTER RE 157 | 0xDB: 0xFF9B, // HALFWIDTH KATAKANA LETTER RO 158 | 0xDC: 0xFF9C, // HALFWIDTH KATAKANA LETTER WA 159 | 0xDD: 0xFF9D, // HALFWIDTH KATAKANA LETTER N 160 | 0xDE: 0xFF9E, // HALFWIDTH KATAKANA VOICED SOUND MARK 161 | 0xDF: 0xFF9F, // HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK 162 | } 163 | -------------------------------------------------------------------------------- /mahonia_test.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | import ( 4 | "bytes" 5 | "io/ioutil" 6 | "testing" 7 | ) 8 | 9 | var nameTests = map[string]string{ 10 | "utf8": "utf8", 11 | "ISO 8859-1": "iso88591", 12 | "Big5": "big5", 13 | "": "", 14 | } 15 | 16 | func TestSimplifyName(t *testing.T) { 17 | for name, simple := range nameTests { 18 | if simple != simplifyName(name) { 19 | t.Errorf("%s came out as %s instead of as %s", name, simplifyName(name), simple) 20 | } 21 | } 22 | } 23 | 24 | var testData = []struct { 25 | utf8, other, otherEncoding string 26 | }{ 27 | {"Résumé", "Résumé", "utf8"}, 28 | {"Résumé", "R\xe9sum\xe9", "latin-1"}, 29 | {"これは漢字です。", "S0\x8c0o0\"oW[g0Y0\x020", "UTF-16LE"}, 30 | {"これは漢字です。", "0S0\x8c0oo\"[W0g0Y0\x02", "UTF-16BE"}, 31 | {"これは漢字です。", "\xfe\xff0S0\x8c0oo\"[W0g0Y0\x02", "UTF-16"}, 32 | {"𝄢𝄞𝄪𝄫", "\xfe\xff\xd8\x34\xdd\x22\xd8\x34\xdd\x1e\xd8\x34\xdd\x2a\xd8\x34\xdd\x2b", "UTF-16"}, 33 | {"Hello, world", "Hello, world", "ASCII"}, 34 | {"Gdańsk", "Gda\xf1sk", "ISO-8859-2"}, 35 | {"Ââ Čč Đđ Ŋŋ Õõ Šš Žž Åå Ää", "\xc2\xe2 \xc8\xe8 \xa9\xb9 \xaf\xbf \xd5\xf5 \xaa\xba \xac\xbc \xc5\xe5 \xc4\xe4", "ISO-8859-10"}, 36 | {"สำหรับ", "\xca\xd3\xcb\xc3\u047a", "ISO-8859-11"}, 37 | {"latviešu", "latvie\xf0u", "ISO-8859-13"}, 38 | {"Seònaid", "Se\xf2naid", "ISO-8859-14"}, 39 | {"€1 is cheap", "\xa41 is cheap", "ISO-8859-15"}, 40 | {"românește", "rom\xe2ne\xbate", "ISO-8859-16"}, 41 | {"nutraĵo", "nutra\xbco", "ISO-8859-3"}, 42 | {"Kalâdlit", "Kal\xe2dlit", "ISO-8859-4"}, 43 | {"русский", "\xe0\xe3\xe1\xe1\xda\xd8\xd9", "ISO-8859-5"}, 44 | {"ελληνικά", "\xe5\xeb\xeb\xe7\xed\xe9\xea\xdc", "ISO-8859-7"}, 45 | {"Kağan", "Ka\xf0an", "ISO-8859-9"}, 46 | {"Résumé", "R\x8esum\x8e", "macintosh"}, 47 | {"Gdańsk", "Gda\xf1sk", "windows-1250"}, 48 | {"русский", "\xf0\xf3\xf1\xf1\xea\xe8\xe9", "windows-1251"}, 49 | {"Résumé", "R\xe9sum\xe9", "windows-1252"}, 50 | {"ελληνικά", "\xe5\xeb\xeb\xe7\xed\xe9\xea\xdc", "windows-1253"}, 51 | {"Kağan", "Ka\xf0an", "windows-1254"}, 52 | {"עִבְרִית", "\xf2\xc4\xe1\xc0\xf8\xc4\xe9\xfa", "windows-1255"}, 53 | {"العربية", "\xc7\xe1\xda\xd1\xc8\xed\xc9", "windows-1256"}, 54 | {"latviešu", "latvie\xf0u", "windows-1257"}, 55 | {"Việt", "Vi\xea\xf2t", "windows-1258"}, 56 | {"สำหรับ", "\xca\xd3\xcb\xc3\u047a", "windows-874"}, 57 | {"русский", "\xd2\xd5\xd3\xd3\xcb\xc9\xca", "KOI8-R"}, 58 | {"українська", "\xd5\xcb\xd2\xc1\xa7\xce\xd3\xd8\xcb\xc1", "KOI8-U"}, 59 | {"Hello 常用國字標準字體表", "Hello \xb1`\xa5\u03b0\xea\xa6r\xbc\u0437\u01e6r\xc5\xe9\xaa\xed", "big5"}, 60 | {"Hello 常用國字標準字體表", "Hello \xb3\xa3\xd3\xc3\x87\xf8\xd7\xd6\x98\xcb\x9c\xca\xd7\xd6\xf3\x77\xb1\xed", "gbk"}, 61 | {"Hello 常用國字標準字體表", "Hello \xb3\xa3\xd3\xc3\x87\xf8\xd7\xd6\x98\xcb\x9c\xca\xd7\xd6\xf3\x77\xb1\xed", "gb18030"}, 62 | {"עִבְרִית", "\x81\x30\xfb\x30\x81\x30\xf6\x34\x81\x30\xf9\x33\x81\x30\xf6\x30\x81\x30\xfb\x36\x81\x30\xf6\x34\x81\x30\xfa\x31\x81\x30\xfb\x38", "gb18030"}, 63 | {"㧯", "\x82\x31\x89\x38", "gb18030"}, 64 | {"これは漢字です。", "\x82\xb1\x82\xea\x82\xcd\x8a\xbf\x8e\x9a\x82\xc5\x82\xb7\x81B", "SJIS"}, 65 | {"これは漢字です。", "\xa4\xb3\xa4\xec\xa4\u03f4\xc1\xbb\xfa\xa4\u01e4\xb9\xa1\xa3", "EUC-JP"}, 66 | } 67 | 68 | func TestDecode(t *testing.T) { 69 | for _, data := range testData { 70 | d := NewDecoder(data.otherEncoding) 71 | if d == nil { 72 | t.Errorf("Could not create decoder for %s", data.otherEncoding) 73 | continue 74 | } 75 | 76 | str := d.ConvertString(data.other) 77 | 78 | if str != data.utf8 { 79 | t.Errorf("Unexpected value: %#v (expected %#v)", str, data.utf8) 80 | } 81 | } 82 | } 83 | 84 | func TestDecodeTranslate(t *testing.T) { 85 | for _, data := range testData { 86 | d := NewDecoder(data.otherEncoding) 87 | if d == nil { 88 | t.Errorf("Could not create decoder for %s", data.otherEncoding) 89 | continue 90 | } 91 | 92 | _, cdata, _ := d.Translate([]byte(data.other), true) 93 | str := string(cdata) 94 | 95 | if str != data.utf8 { 96 | t.Errorf("Unexpected value: %#v (expected %#v)", str, data.utf8) 97 | } 98 | } 99 | } 100 | 101 | func TestEncode(t *testing.T) { 102 | for _, data := range testData { 103 | e := NewEncoder(data.otherEncoding) 104 | if e == nil { 105 | t.Errorf("Could not create encoder for %s", data.otherEncoding) 106 | continue 107 | } 108 | 109 | str := e.ConvertString(data.utf8) 110 | 111 | if str != data.other { 112 | t.Errorf("Unexpected value: %#v (expected %#v)", str, data.other) 113 | } 114 | } 115 | } 116 | 117 | func TestReader(t *testing.T) { 118 | for _, data := range testData { 119 | d := NewDecoder(data.otherEncoding) 120 | if d == nil { 121 | t.Errorf("Could not create decoder for %s", data.otherEncoding) 122 | continue 123 | } 124 | 125 | b := bytes.NewBufferString(data.other) 126 | r := d.NewReader(b) 127 | result, _ := ioutil.ReadAll(r) 128 | str := string(result) 129 | 130 | if str != data.utf8 { 131 | t.Errorf("Unexpected value: %#v (expected %#v)", str, data.utf8) 132 | } 133 | } 134 | } 135 | 136 | func TestWriter(t *testing.T) { 137 | for _, data := range testData { 138 | e := NewEncoder(data.otherEncoding) 139 | if e == nil { 140 | t.Errorf("Could not create encoder for %s", data.otherEncoding) 141 | continue 142 | } 143 | 144 | b := new(bytes.Buffer) 145 | w := e.NewWriter(b) 146 | w.Write([]byte(data.utf8)) 147 | str := b.String() 148 | 149 | if str != data.other { 150 | t.Errorf("Unexpected value: %#v (expected %#v)", str, data.other) 151 | } 152 | } 153 | } 154 | 155 | func TestFallback(t *testing.T) { 156 | mixed := "résum\xe9 " // The space is needed because of the issue mentioned in the Note: in fallback.go 157 | pure := "résumé " 158 | d := FallbackDecoder(NewDecoder("utf8"), NewDecoder("ISO-8859-1")) 159 | result := d.ConvertString(mixed) 160 | if result != pure { 161 | t.Errorf("Unexpected value: %#v (expected %#v)", result, pure) 162 | } 163 | } 164 | 165 | func TestEntities(t *testing.T) { 166 | escaped := "¬it; I'm ∉ I tell you‚ ≪⃒ " 167 | plain := "¬it; I'm ∉ I tell you\u201a \u226A\u20D2 " 168 | d := FallbackDecoder(EntityDecoder(), NewDecoder("ISO-8859-1")) 169 | result := d.ConvertString(escaped) 170 | if result != plain { 171 | t.Errorf("Unexpected value: %#v (expected %#v)", result, plain) 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /mahoniconv/mahoniconv.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/axgle/mahonia" 5 | "flag" 6 | "io" 7 | "log" 8 | "os" 9 | ) 10 | 11 | // An iconv workalike using mahonia. 12 | 13 | var from = flag.String("f", "utf-8", "source character set") 14 | var to = flag.String("t", "utf-8", "destination character set") 15 | 16 | func main() { 17 | flag.Parse() 18 | 19 | var r io.Reader = os.Stdin 20 | var w io.Writer = os.Stdout 21 | 22 | if *from != "utf-8" { 23 | decode := mahonia.NewDecoder(*from) 24 | if decode == nil { 25 | log.Fatalf("Could not create decoder for %s", *from) 26 | } 27 | r = decode.NewReader(r) 28 | } 29 | 30 | if *to != "utf-8" { 31 | encode := mahonia.NewEncoder(*to) 32 | if encode == nil { 33 | log.Fatalf("Could not create decoder for %s", *to) 34 | } 35 | w = encode.NewWriter(w) 36 | } 37 | 38 | io.Copy(w, r) 39 | } 40 | -------------------------------------------------------------------------------- /mbcs.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | // Generic converters for multibyte character sets. 4 | 5 | // An mbcsTrie contains the data to convert from the character set to Unicode. 6 | // If a character would be encoded as "\x01\x02\x03", its unicode value would be found at t.children[1].children[2].children[3].rune 7 | // children either is nil or has 256 elements. 8 | type mbcsTrie struct { 9 | // For leaf nodes, the Unicode character that is represented. 10 | char rune 11 | 12 | // For non-leaf nodes, the trie to decode the remainder of the character. 13 | children []mbcsTrie 14 | } 15 | 16 | // A MBCSTable holds the data to convert to and from Unicode. 17 | type MBCSTable struct { 18 | toUnicode mbcsTrie 19 | fromUnicode map[rune]string 20 | } 21 | 22 | // AddCharacter adds a character to the table. rune is its Unicode code point, 23 | // and bytes contains the bytes used to encode it in the character set. 24 | func (table *MBCSTable) AddCharacter(c rune, bytes string) { 25 | if table.fromUnicode == nil { 26 | table.fromUnicode = make(map[rune]string) 27 | } 28 | 29 | table.fromUnicode[c] = bytes 30 | 31 | trie := &table.toUnicode 32 | for i := 0; i < len(bytes); i++ { 33 | if trie.children == nil { 34 | trie.children = make([]mbcsTrie, 256) 35 | } 36 | 37 | b := bytes[i] 38 | trie = &trie.children[b] 39 | } 40 | 41 | trie.char = c 42 | } 43 | 44 | func (table *MBCSTable) Decoder() Decoder { 45 | return func(p []byte) (c rune, size int, status Status) { 46 | if len(p) == 0 { 47 | status = NO_ROOM 48 | return 49 | } 50 | 51 | if p[0] == 0 { 52 | return 0, 1, SUCCESS 53 | } 54 | 55 | trie := &table.toUnicode 56 | for trie.char == 0 { 57 | if trie.children == nil { 58 | return 0xfffd, 1, INVALID_CHAR 59 | } 60 | if len(p) < size+1 { 61 | return 0, 0, NO_ROOM 62 | } 63 | 64 | trie = &trie.children[p[size]] 65 | size++ 66 | } 67 | 68 | c = trie.char 69 | status = SUCCESS 70 | return 71 | } 72 | } 73 | 74 | func (table *MBCSTable) Encoder() Encoder { 75 | return func(p []byte, c rune) (size int, status Status) { 76 | bytes := table.fromUnicode[c] 77 | if bytes == "" { 78 | if len(p) > 0 { 79 | p[0] = '?' 80 | return 1, INVALID_CHAR 81 | } else { 82 | return 0, NO_ROOM 83 | } 84 | } 85 | 86 | if len(p) < len(bytes) { 87 | return 0, NO_ROOM 88 | } 89 | 90 | return copy(p, bytes), SUCCESS 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /reader.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | // This file is based on bufio.Reader in the Go standard library, 4 | // which has the following copyright notice: 5 | 6 | // Copyright 2009 The Go Authors. All rights reserved. 7 | // Use of this source code is governed by a BSD-style 8 | // license that can be found in the LICENSE file. 9 | 10 | import ( 11 | "io" 12 | "unicode/utf8" 13 | ) 14 | 15 | const ( 16 | defaultBufSize = 4096 17 | ) 18 | 19 | // Reader implements character-set decoding for an io.Reader object. 20 | type Reader struct { 21 | buf []byte 22 | rd io.Reader 23 | decode Decoder 24 | r, w int 25 | err error 26 | } 27 | 28 | // NewReader creates a new Reader that uses the receiver to decode text. 29 | func (d Decoder) NewReader(rd io.Reader) *Reader { 30 | b := new(Reader) 31 | b.buf = make([]byte, defaultBufSize) 32 | b.rd = rd 33 | b.decode = d 34 | return b 35 | } 36 | 37 | // fill reads a new chunk into the buffer. 38 | func (b *Reader) fill() { 39 | // Slide existing data to beginning. 40 | if b.r > 0 { 41 | copy(b.buf, b.buf[b.r:b.w]) 42 | b.w -= b.r 43 | b.r = 0 44 | } 45 | 46 | // Read new data. 47 | n, e := b.rd.Read(b.buf[b.w:]) 48 | b.w += n 49 | if e != nil { 50 | b.err = e 51 | } 52 | } 53 | 54 | // Read reads data into p. 55 | // It returns the number of bytes read into p. 56 | // It calls Read at most once on the underlying Reader, 57 | // hence n may be less than len(p). 58 | // At EOF, the count will be zero and err will be os.EOF. 59 | func (b *Reader) Read(p []byte) (n int, err error) { 60 | n = len(p) 61 | filled := false 62 | if n == 0 { 63 | return 0, b.err 64 | } 65 | if b.w == b.r { 66 | if b.err != nil { 67 | return 0, b.err 68 | } 69 | if n > len(b.buf) { 70 | // Large read, empty buffer. 71 | // Allocate a larger buffer for efficiency. 72 | b.buf = make([]byte, n) 73 | } 74 | b.fill() 75 | filled = true 76 | if b.w == b.r { 77 | return 0, b.err 78 | } 79 | } 80 | 81 | i := 0 82 | for i < n { 83 | rune, size, status := b.decode(b.buf[b.r:b.w]) 84 | 85 | if status == STATE_ONLY { 86 | b.r += size 87 | continue 88 | } 89 | 90 | if status == NO_ROOM { 91 | if b.err != nil { 92 | rune = 0xfffd 93 | size = b.w - b.r 94 | if size == 0 { 95 | break 96 | } 97 | status = INVALID_CHAR 98 | } else if filled { 99 | break 100 | } else { 101 | b.fill() 102 | filled = true 103 | continue 104 | } 105 | } 106 | 107 | if i+utf8.RuneLen(rune) > n { 108 | break 109 | } 110 | 111 | b.r += size 112 | if rune < 128 { 113 | p[i] = byte(rune) 114 | i++ 115 | } else { 116 | i += utf8.EncodeRune(p[i:], rune) 117 | } 118 | } 119 | 120 | return i, nil 121 | } 122 | 123 | // ReadRune reads a single Unicode character and returns the 124 | // rune and its size in bytes. 125 | func (b *Reader) ReadRune() (c rune, size int, err error) { 126 | read: 127 | c, size, status := b.decode(b.buf[b.r:b.w]) 128 | 129 | if status == NO_ROOM && b.err == nil { 130 | b.fill() 131 | goto read 132 | } 133 | 134 | if status == STATE_ONLY { 135 | b.r += size 136 | goto read 137 | } 138 | 139 | if b.r == b.w { 140 | return 0, 0, b.err 141 | } 142 | 143 | if status == NO_ROOM { 144 | c = 0xfffd 145 | size = b.w - b.r 146 | status = INVALID_CHAR 147 | } 148 | 149 | b.r += size 150 | return c, size, nil 151 | } 152 | -------------------------------------------------------------------------------- /shiftjis.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | // Converters for the Shift-JIS encoding. 4 | 5 | import ( 6 | "sync" 7 | ) 8 | 9 | func init() { 10 | RegisterCharset(&Charset{ 11 | Name: "Shift_JIS", 12 | Aliases: []string{"MS_Kanji", "csShiftJIS", "SJIS"}, 13 | NewDecoder: func() Decoder { 14 | sjisOnce.Do(makeSjisTable) 15 | return sjisTable.Decoder() 16 | }, 17 | NewEncoder: func() Encoder { 18 | sjisOnce.Do(makeSjisTable) 19 | return sjisTable.Encoder() 20 | }, 21 | }) 22 | } 23 | 24 | var sjisOnce sync.Once 25 | 26 | var sjisTable MBCSTable 27 | 28 | func makeSjisTable() { 29 | var b [2]byte 30 | 31 | for jis0208, unicode := range jis0208ToUnicode { 32 | if unicode == 0 { 33 | continue 34 | } 35 | 36 | j1 := byte(jis0208 >> 8) 37 | j2 := byte(jis0208) 38 | 39 | if j1 < 95 { 40 | b[0] = (j1+1)/2 + 112 41 | } else { 42 | b[0] = (j1+1)/2 + 176 43 | } 44 | 45 | if j1&1 == 1 { 46 | b[1] = j2 + 31 47 | if j2 >= 96 { 48 | b[1]++ 49 | } 50 | } else { 51 | b[1] = j2 + 126 52 | } 53 | 54 | sjisTable.AddCharacter(rune(unicode), string(b[:])) 55 | } 56 | 57 | for jis0201, unicode := range jis0201ToUnicode { 58 | if unicode == 0 { 59 | continue 60 | } 61 | 62 | sjisTable.AddCharacter(rune(unicode), string(byte(jis0201))) 63 | } 64 | 65 | for i := '\x00'; i < 32; i++ { 66 | sjisTable.AddCharacter(i, string(byte(i))) 67 | } 68 | 69 | sjisTable.AddCharacter(0x7f, "\x7f") 70 | } 71 | -------------------------------------------------------------------------------- /translate.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | import "unicode/utf8" 4 | 5 | // Translate enables a Decoder to implement go-charset's Translator interface. 6 | func (d Decoder) Translate(data []byte, eof bool) (n int, cdata []byte, err error) { 7 | cdata = make([]byte, len(data)+1) 8 | destPos := 0 9 | 10 | for n < len(data) { 11 | rune, size, status := d(data[n:]) 12 | 13 | switch status { 14 | case STATE_ONLY: 15 | n += size 16 | continue 17 | 18 | case NO_ROOM: 19 | if !eof { 20 | return n, cdata[:destPos], nil 21 | } 22 | rune = 0xfffd 23 | n = len(data) 24 | 25 | default: 26 | n += size 27 | } 28 | 29 | if rune < 128 { 30 | if destPos >= len(cdata) { 31 | cdata = doubleLength(cdata) 32 | } 33 | cdata[destPos] = byte(rune) 34 | destPos++ 35 | } else { 36 | if destPos+utf8.RuneLen(rune) > len(cdata) { 37 | cdata = doubleLength(cdata) 38 | } 39 | destPos += utf8.EncodeRune(cdata[destPos:], rune) 40 | } 41 | } 42 | 43 | return n, cdata[:destPos], nil 44 | } 45 | 46 | func doubleLength(b []byte) []byte { 47 | b2 := make([]byte, 2*len(b)) 48 | copy(b2, b) 49 | return b2 50 | } 51 | -------------------------------------------------------------------------------- /utf16.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | import ( 4 | "unicode/utf16" 5 | ) 6 | 7 | func init() { 8 | for i := 0; i < len(utf16Charsets); i++ { 9 | RegisterCharset(&utf16Charsets[i]) 10 | } 11 | } 12 | 13 | var utf16Charsets = []Charset{ 14 | { 15 | Name: "UTF-16", 16 | NewDecoder: func() Decoder { 17 | var decodeRune Decoder 18 | return func(p []byte) (c rune, size int, status Status) { 19 | if decodeRune == nil { 20 | // haven't read the BOM yet 21 | if len(p) < 2 { 22 | status = NO_ROOM 23 | return 24 | } 25 | 26 | switch { 27 | case p[0] == 0xfe && p[1] == 0xff: 28 | decodeRune = decodeUTF16beRune 29 | return 0, 2, STATE_ONLY 30 | case p[0] == 0xff && p[1] == 0xfe: 31 | decodeRune = decodeUTF16leRune 32 | return 0, 2, STATE_ONLY 33 | default: 34 | decodeRune = decodeUTF16beRune 35 | } 36 | } 37 | 38 | return decodeRune(p) 39 | } 40 | }, 41 | NewEncoder: func() Encoder { 42 | wroteBOM := false 43 | return func(p []byte, c rune) (size int, status Status) { 44 | if !wroteBOM { 45 | if len(p) < 2 { 46 | status = NO_ROOM 47 | return 48 | } 49 | 50 | p[0] = 0xfe 51 | p[1] = 0xff 52 | wroteBOM = true 53 | return 2, STATE_ONLY 54 | } 55 | 56 | return encodeUTF16beRune(p, c) 57 | } 58 | }, 59 | }, 60 | { 61 | Name: "UTF-16BE", 62 | NewDecoder: func() Decoder { return decodeUTF16beRune }, 63 | NewEncoder: func() Encoder { return encodeUTF16beRune }, 64 | }, 65 | { 66 | Name: "UTF-16LE", 67 | NewDecoder: func() Decoder { return decodeUTF16leRune }, 68 | NewEncoder: func() Encoder { return encodeUTF16leRune }, 69 | }, 70 | } 71 | 72 | func decodeUTF16beRune(p []byte) (r rune, size int, status Status) { 73 | if len(p) < 2 { 74 | status = NO_ROOM 75 | return 76 | } 77 | 78 | c := rune(p[0])<<8 + rune(p[1]) 79 | 80 | if utf16.IsSurrogate(c) { 81 | if len(p) < 4 { 82 | status = NO_ROOM 83 | return 84 | } 85 | 86 | c2 := rune(p[2])<<8 + rune(p[3]) 87 | c = utf16.DecodeRune(c, c2) 88 | 89 | if c == 0xfffd { 90 | return c, 2, INVALID_CHAR 91 | } else { 92 | return c, 4, SUCCESS 93 | } 94 | } 95 | 96 | return c, 2, SUCCESS 97 | } 98 | 99 | func encodeUTF16beRune(p []byte, c rune) (size int, status Status) { 100 | if c < 0x10000 { 101 | if len(p) < 2 { 102 | status = NO_ROOM 103 | return 104 | } 105 | p[0] = byte(c >> 8) 106 | p[1] = byte(c) 107 | return 2, SUCCESS 108 | } 109 | 110 | if len(p) < 4 { 111 | status = NO_ROOM 112 | return 113 | } 114 | s1, s2 := utf16.EncodeRune(c) 115 | p[0] = byte(s1 >> 8) 116 | p[1] = byte(s1) 117 | p[2] = byte(s2 >> 8) 118 | p[3] = byte(s2) 119 | return 4, SUCCESS 120 | } 121 | 122 | func decodeUTF16leRune(p []byte) (r rune, size int, status Status) { 123 | if len(p) < 2 { 124 | status = NO_ROOM 125 | return 126 | } 127 | 128 | c := rune(p[1])<<8 + rune(p[0]) 129 | 130 | if utf16.IsSurrogate(c) { 131 | if len(p) < 4 { 132 | status = NO_ROOM 133 | return 134 | } 135 | 136 | c2 := rune(p[3])<<8 + rune(p[2]) 137 | c = utf16.DecodeRune(c, c2) 138 | 139 | if c == 0xfffd { 140 | return c, 2, INVALID_CHAR 141 | } else { 142 | return c, 4, SUCCESS 143 | } 144 | } 145 | 146 | return c, 2, SUCCESS 147 | } 148 | 149 | func encodeUTF16leRune(p []byte, c rune) (size int, status Status) { 150 | if c < 0x10000 { 151 | if len(p) < 2 { 152 | status = NO_ROOM 153 | return 154 | } 155 | p[1] = byte(c >> 8) 156 | p[0] = byte(c) 157 | return 2, SUCCESS 158 | } 159 | 160 | if len(p) < 4 { 161 | status = NO_ROOM 162 | return 163 | } 164 | s1, s2 := utf16.EncodeRune(c) 165 | p[1] = byte(s1 >> 8) 166 | p[0] = byte(s1) 167 | p[3] = byte(s2 >> 8) 168 | p[2] = byte(s2) 169 | return 4, SUCCESS 170 | } 171 | -------------------------------------------------------------------------------- /utf8.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | import "unicode/utf8" 4 | 5 | func init() { 6 | RegisterCharset(&Charset{ 7 | Name: "UTF-8", 8 | NewDecoder: func() Decoder { return decodeUTF8Rune }, 9 | NewEncoder: func() Encoder { return encodeUTF8Rune }, 10 | }) 11 | } 12 | 13 | func decodeUTF8Rune(p []byte) (c rune, size int, status Status) { 14 | if len(p) == 0 { 15 | status = NO_ROOM 16 | return 17 | } 18 | 19 | if p[0] < 128 { 20 | return rune(p[0]), 1, SUCCESS 21 | } 22 | 23 | c, size = utf8.DecodeRune(p) 24 | 25 | if c == 0xfffd { 26 | if utf8.FullRune(p) { 27 | status = INVALID_CHAR 28 | return 29 | } 30 | 31 | return 0, 0, NO_ROOM 32 | } 33 | 34 | status = SUCCESS 35 | return 36 | } 37 | 38 | func encodeUTF8Rune(p []byte, c rune) (size int, status Status) { 39 | size = utf8.RuneLen(c) 40 | if size > len(p) { 41 | return 0, NO_ROOM 42 | } 43 | 44 | return utf8.EncodeRune(p, c), SUCCESS 45 | } 46 | -------------------------------------------------------------------------------- /writer.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | import ( 4 | "io" 5 | "unicode/utf8" 6 | ) 7 | 8 | // Writer implements character-set encoding for an io.Writer object. 9 | type Writer struct { 10 | wr io.Writer 11 | encode Encoder 12 | inbuf []byte 13 | outbuf []byte 14 | } 15 | 16 | // NewWriter creates a new Writer that uses the receiver to encode text. 17 | func (e Encoder) NewWriter(wr io.Writer) *Writer { 18 | w := new(Writer) 19 | w.wr = wr 20 | w.encode = e 21 | return w 22 | } 23 | 24 | // Write encodes and writes the data from p. 25 | func (w *Writer) Write(p []byte) (n int, err error) { 26 | n = len(p) 27 | 28 | if len(w.inbuf) > 0 { 29 | w.inbuf = append(w.inbuf, p...) 30 | p = w.inbuf 31 | } 32 | 33 | if len(w.outbuf) < len(p) { 34 | w.outbuf = make([]byte, len(p)+10) 35 | } 36 | 37 | outpos := 0 38 | 39 | for len(p) > 0 { 40 | rune, size := utf8.DecodeRune(p) 41 | if rune == 0xfffd && !utf8.FullRune(p) { 42 | break 43 | } 44 | 45 | p = p[size:] 46 | 47 | retry: 48 | size, status := w.encode(w.outbuf[outpos:], rune) 49 | 50 | if status == NO_ROOM { 51 | newDest := make([]byte, len(w.outbuf)*2) 52 | copy(newDest, w.outbuf) 53 | w.outbuf = newDest 54 | goto retry 55 | } 56 | 57 | if status == STATE_ONLY { 58 | outpos += size 59 | goto retry 60 | } 61 | 62 | outpos += size 63 | } 64 | 65 | w.inbuf = w.inbuf[:0] 66 | if len(p) > 0 { 67 | w.inbuf = append(w.inbuf, p...) 68 | } 69 | 70 | n1, err := w.wr.Write(w.outbuf[0:outpos]) 71 | 72 | if err != nil && n1 < n { 73 | n = n1 74 | } 75 | 76 | return 77 | } 78 | 79 | func (w *Writer) WriteRune(c rune) (size int, err error) { 80 | if len(w.inbuf) > 0 { 81 | // There are leftover bytes, a partial UTF-8 sequence. 82 | w.inbuf = w.inbuf[:0] 83 | w.WriteRune(0xfffd) 84 | } 85 | 86 | if w.outbuf == nil { 87 | w.outbuf = make([]byte, 16) 88 | } 89 | 90 | outpos := 0 91 | 92 | retry: 93 | size, status := w.encode(w.outbuf[outpos:], c) 94 | 95 | if status == NO_ROOM { 96 | w.outbuf = make([]byte, len(w.outbuf)*2) 97 | goto retry 98 | } 99 | 100 | if status == STATE_ONLY { 101 | outpos += size 102 | goto retry 103 | } 104 | 105 | outpos += size 106 | 107 | return w.wr.Write(w.outbuf[0:outpos]) 108 | } 109 | --------------------------------------------------------------------------------