├── 8bit.go ├── ASCII.go ├── README.md ├── big5-data.go ├── big5.go ├── charset.go ├── convert_string.go ├── cp51932.go ├── entity.go ├── entity_data.go ├── euc-jp.go ├── euc-kr-data.go ├── euc-kr.go ├── fallback.go ├── gb18030-data.go ├── gb18030.go ├── gbk-data.go ├── gbk.go ├── iso2022jp.go ├── jis0201-data.go ├── jis0208-data.go ├── jis0212-data.go ├── kuten.go ├── mahonia_test.go ├── mahoniconv └── mahoniconv.go ├── mbcs.go ├── ms-jis-data.go ├── reader.go ├── shiftjis-data.go ├── shiftjis.go ├── tcvn3.go ├── translate.go ├── utf16.go ├── utf8.go └── writer.go /ASCII.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | // Converters for ASCII and ISO-8859-1 4 | 5 | func init() { 6 | for i := 0; i < len(asciiCharsets); i++ { 7 | RegisterCharset(&asciiCharsets[i]) 8 | } 9 | } 10 | 11 | var asciiCharsets = []Charset{ 12 | { 13 | Name: "US-ASCII", 14 | NewDecoder: func() Decoder { return decodeASCIIRune }, 15 | NewEncoder: func() Encoder { return encodeASCIIRune }, 16 | Aliases: []string{"ASCII", "US", "ISO646-US", "IBM367", "cp367", "ANSI_X3.4-1968", "iso-ir-6", "ANSI_X3.4-1986", "ISO_646.irv:1991", "csASCII"}, 17 | }, 18 | { 19 | Name: "ISO-8859-1", 20 | NewDecoder: func() Decoder { return decodeLatin1Rune }, 21 | NewEncoder: func() Encoder { return encodeLatin1Rune }, 22 | Aliases: []string{"latin1", "ISO Latin 1", "IBM819", "cp819", "ISO_8859-1:1987", "iso-ir-100", "l1", "csISOLatin1"}, 23 | }, 24 | } 25 | 26 | func decodeASCIIRune(p []byte) (c rune, size int, status Status) { 27 | if len(p) == 0 { 28 | status = NO_ROOM 29 | return 30 | } 31 | 32 | b := p[0] 33 | if b > 127 { 34 | return 0xfffd, 1, INVALID_CHAR 35 | } 36 | return rune(b), 1, SUCCESS 37 | } 38 | 39 | func encodeASCIIRune(p []byte, c rune) (size int, status Status) { 40 | if len(p) == 0 { 41 | status = NO_ROOM 42 | return 43 | } 44 | 45 | if c < 128 { 46 | p[0] = byte(c) 47 | return 1, SUCCESS 48 | } 49 | 50 | p[0] = '?' 51 | return 1, INVALID_CHAR 52 | } 53 | 54 | func decodeLatin1Rune(p []byte) (c rune, size int, status Status) { 55 | if len(p) == 0 { 56 | status = NO_ROOM 57 | return 58 | } 59 | 60 | return rune(p[0]), 1, SUCCESS 61 | } 62 | 63 | func encodeLatin1Rune(p []byte, c rune) (size int, status Status) { 64 | if len(p) == 0 { 65 | status = NO_ROOM 66 | return 67 | } 68 | 69 | if c < 256 { 70 | p[0] = byte(c) 71 | return 1, SUCCESS 72 | } 73 | 74 | p[0] = '?' 75 | return 1, INVALID_CHAR 76 | } 77 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | mahonia 2 | ======= 3 | 4 | character-set conversion library implemented in Go. 5 | 6 | Mahonia is a character-set conversion library implemented in Go. 7 | All data is compiled into the executable; it doesn't need any external data files. 8 | 9 | Copy from http://code.google.com/p/mahonia/ 10 | 11 | install 12 | ------- 13 | 14 | go get github.com/henrylee2cn/mahonia 15 | 16 | example 17 | ------- 18 | 19 | ``` 20 | package main 21 | import "fmt" 22 | import "github.com/henrylee2cn/mahonia" 23 | func main(){ 24 | enc:=mahonia.NewEncoder("gbk") 25 | //converts a string from UTF-8 to gbk encoding. 26 | fmt.Println(enc.ConvertString("hello,世界")) 27 | } 28 | ``` 29 | -------------------------------------------------------------------------------- /big5.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | // Converters for Big 5 encoding. 4 | 5 | import ( 6 | "sync" 7 | ) 8 | 9 | func init() { 10 | RegisterCharset(&Charset{ 11 | Name: "Big5", 12 | Aliases: []string{"csBig5"}, 13 | NewDecoder: func() Decoder { 14 | return decodeBig5Rune 15 | }, 16 | NewEncoder: func() Encoder { 17 | big5Once.Do(reverseBig5Table) 18 | return encodeBig5Rune 19 | }, 20 | }) 21 | } 22 | 23 | func decodeBig5Rune(p []byte) (r rune, size int, status Status) { 24 | if len(p) == 0 { 25 | status = NO_ROOM 26 | return 27 | } 28 | 29 | b := p[0] 30 | if b < 128 { 31 | return rune(b), 1, SUCCESS 32 | } 33 | 34 | if len(p) < 2 { 35 | status = NO_ROOM 36 | return 37 | } 38 | 39 | c := int(p[0])<<8 + int(p[1]) 40 | c = int(big5ToUnicode[c]) 41 | if c > 0 { 42 | return rune(c), 2, SUCCESS 43 | } 44 | 45 | return 0xfffd, 1, INVALID_CHAR 46 | } 47 | 48 | func encodeBig5Rune(p []byte, r rune) (size int, status Status) { 49 | if len(p) == 0 { 50 | status = NO_ROOM 51 | return 52 | } 53 | 54 | if r < 128 { 55 | p[0] = byte(r) 56 | return 1, SUCCESS 57 | } 58 | 59 | if len(p) < 2 { 60 | status = NO_ROOM 61 | return 62 | } 63 | 64 | if r < 0x10000 { 65 | c := unicodeToBig5[r] 66 | if c > 0 { 67 | p[0] = byte(c >> 8) 68 | p[1] = byte(c) 69 | return 2, SUCCESS 70 | } 71 | } 72 | 73 | p[0] = '?' 74 | return 1, INVALID_CHAR 75 | } 76 | 77 | var big5Once sync.Once 78 | 79 | var unicodeToBig5 []uint16 80 | 81 | func reverseBig5Table() { 82 | unicodeToBig5 = make([]uint16, 65536) 83 | 84 | for big5, unicode := range big5ToUnicode { 85 | if unicode > 0 { 86 | unicodeToBig5[unicode] = uint16(big5) 87 | } 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /charset.go: -------------------------------------------------------------------------------- 1 | // This package is a character-set conversion library for Go. 2 | // 3 | // (DEPRECATED: use code.google.com/p/go.text/encoding, perhaps along with 4 | // code.google.com/p/go.net/html/charset) 5 | package mahonia 6 | 7 | import ( 8 | "bytes" 9 | "unicode" 10 | ) 11 | 12 | // Status is the type for the status return value from a Decoder or Encoder. 13 | type Status int 14 | 15 | const ( 16 | // SUCCESS means that the character was converted with no problems. 17 | SUCCESS = Status(iota) 18 | 19 | // INVALID_CHAR means that the source contained invalid bytes, or that the character 20 | // could not be represented in the destination encoding. 21 | // The Encoder or Decoder should have output a substitute character. 22 | INVALID_CHAR 23 | 24 | // NO_ROOM means there were not enough input bytes to form a complete character, 25 | // or there was not enough room in the output buffer to write a complete character. 26 | // No bytes were written, and no internal state was changed in the Encoder or Decoder. 27 | NO_ROOM 28 | 29 | // STATE_ONLY means that bytes were read or written indicating a state transition, 30 | // but no actual character was processed. (Examples: byte order marks, ISO-2022 escape sequences) 31 | STATE_ONLY 32 | ) 33 | 34 | // A Decoder is a function that decodes a character set, one character at a time. 35 | // It works much like utf8.DecodeRune, but has an aditional status return value. 36 | type Decoder func(p []byte) (c rune, size int, status Status) 37 | 38 | // An Encoder is a function that encodes a character set, one character at a time. 39 | // It works much like utf8.EncodeRune, but has an additional status return value. 40 | type Encoder func(p []byte, c rune) (size int, status Status) 41 | 42 | // A Charset represents a character set that can be converted, and contains functions 43 | // to create Converters to encode and decode strings in that character set. 44 | type Charset struct { 45 | // Name is the character set's canonical name. 46 | Name string 47 | 48 | // Aliases returns a list of alternate names. 49 | Aliases []string 50 | 51 | // NewDecoder returns a Decoder to convert from the charset to Unicode. 52 | NewDecoder func() Decoder 53 | 54 | // NewEncoder returns an Encoder to convert from Unicode to the charset. 55 | NewEncoder func() Encoder 56 | } 57 | 58 | // The charsets are stored in charsets under their canonical names. 59 | var charsets = make(map[string]*Charset) 60 | 61 | // aliases maps their aliases to their canonical names. 62 | var aliases = make(map[string]string) 63 | 64 | // simplifyName converts a name to lower case and removes non-alphanumeric characters. 65 | // This is how the names are used as keys to the maps. 66 | func simplifyName(name string) string { 67 | var buf bytes.Buffer 68 | for _, c := range name { 69 | switch { 70 | case unicode.IsDigit(c): 71 | buf.WriteRune(c) 72 | case unicode.IsLetter(c): 73 | buf.WriteRune(unicode.ToLower(c)) 74 | default: 75 | 76 | } 77 | } 78 | 79 | return buf.String() 80 | } 81 | 82 | // RegisterCharset adds a charset to the charsetMap. 83 | func RegisterCharset(cs *Charset) { 84 | name := cs.Name 85 | charsets[name] = cs 86 | aliases[simplifyName(name)] = name 87 | for _, alias := range cs.Aliases { 88 | aliases[simplifyName(alias)] = name 89 | } 90 | } 91 | 92 | // GetCharset fetches a charset by name. 93 | // If the name is not found, it returns nil. 94 | func GetCharset(name string) *Charset { 95 | return charsets[aliases[simplifyName(name)]] 96 | } 97 | 98 | // NewDecoder returns a Decoder to decode the named charset. 99 | // If the name is not found, it returns nil. 100 | func NewDecoder(name string) Decoder { 101 | cs := GetCharset(name) 102 | if cs == nil { 103 | return nil 104 | } 105 | return cs.NewDecoder() 106 | } 107 | 108 | // NewEncoder returns an Encoder to encode the named charset. 109 | func NewEncoder(name string) Encoder { 110 | cs := GetCharset(name) 111 | if cs == nil { 112 | return nil 113 | } 114 | return cs.NewEncoder() 115 | } 116 | -------------------------------------------------------------------------------- /convert_string.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | import ( 4 | "unicode/utf8" 5 | ) 6 | 7 | // ConvertString converts a string from UTF-8 to e's encoding. 8 | func (e Encoder) ConvertString(s string) string { 9 | dest := make([]byte, len(s)+10) 10 | destPos := 0 11 | 12 | for _, rune := range s { 13 | retry: 14 | size, status := e(dest[destPos:], rune) 15 | 16 | if status == NO_ROOM { 17 | newDest := make([]byte, len(dest)*2) 18 | copy(newDest, dest) 19 | dest = newDest 20 | goto retry 21 | } 22 | 23 | if status == STATE_ONLY { 24 | destPos += size 25 | goto retry 26 | } 27 | 28 | destPos += size 29 | } 30 | 31 | return string(dest[:destPos]) 32 | } 33 | 34 | // ConvertString converts a string from d's encoding to UTF-8. 35 | func (d Decoder) ConvertString(s string) string { 36 | bytes := []byte(s) 37 | runes := make([]rune, len(s)) 38 | destPos := 0 39 | 40 | for len(bytes) > 0 { 41 | c, size, status := d(bytes) 42 | 43 | if status == STATE_ONLY { 44 | bytes = bytes[size:] 45 | continue 46 | } 47 | 48 | if status == NO_ROOM { 49 | c = 0xfffd 50 | size = len(bytes) 51 | status = INVALID_CHAR 52 | } 53 | 54 | bytes = bytes[size:] 55 | runes[destPos] = c 56 | destPos++ 57 | } 58 | 59 | return string(runes[:destPos]) 60 | } 61 | 62 | // ConvertStringOK converts a string from UTF-8 to e's encoding. It also 63 | // returns a boolean indicating whether every character was converted 64 | // successfully. 65 | func (e Encoder) ConvertStringOK(s string) (result string, ok bool) { 66 | dest := make([]byte, len(s)+10) 67 | destPos := 0 68 | ok = true 69 | 70 | for i, r := range s { 71 | // The following test is copied from utf8.ValidString. 72 | if r == utf8.RuneError && ok { 73 | _, size := utf8.DecodeRuneInString(s[i:]) 74 | if size == 1 { 75 | ok = false 76 | } 77 | } 78 | 79 | retry: 80 | size, status := e(dest[destPos:], r) 81 | 82 | switch status { 83 | case NO_ROOM: 84 | newDest := make([]byte, len(dest)*2) 85 | copy(newDest, dest) 86 | dest = newDest 87 | goto retry 88 | 89 | case STATE_ONLY: 90 | destPos += size 91 | goto retry 92 | 93 | case INVALID_CHAR: 94 | ok = false 95 | } 96 | 97 | destPos += size 98 | } 99 | 100 | return string(dest[:destPos]), ok 101 | } 102 | 103 | // ConvertStringOK converts a string from d's encoding to UTF-8. 104 | // It also returns a boolean indicating whether every character was converted 105 | // successfully. 106 | func (d Decoder) ConvertStringOK(s string) (result string, ok bool) { 107 | bytes := []byte(s) 108 | runes := make([]rune, len(s)) 109 | destPos := 0 110 | ok = true 111 | 112 | for len(bytes) > 0 { 113 | c, size, status := d(bytes) 114 | 115 | switch status { 116 | case STATE_ONLY: 117 | bytes = bytes[size:] 118 | continue 119 | 120 | case NO_ROOM: 121 | c = 0xfffd 122 | size = len(bytes) 123 | ok = false 124 | 125 | case INVALID_CHAR: 126 | ok = false 127 | } 128 | 129 | bytes = bytes[size:] 130 | runes[destPos] = c 131 | destPos++ 132 | } 133 | 134 | return string(runes[:destPos]), ok 135 | } 136 | -------------------------------------------------------------------------------- /cp51932.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | import ( 4 | "unicode/utf8" 5 | ) 6 | 7 | // Converters for Microsoft's version of the EUC-JP encoding 8 | 9 | func init() { 10 | RegisterCharset(&Charset{ 11 | Name: "cp51932", 12 | Aliases: []string{"windows-51932"}, 13 | NewDecoder: func() Decoder { 14 | return decodeCP51932 15 | }, 16 | NewEncoder: func() Encoder { 17 | msJISTable.Reverse() 18 | return encodeCP51932 19 | }, 20 | }) 21 | } 22 | 23 | func decodeCP51932(p []byte) (c rune, size int, status Status) { 24 | if len(p) == 0 { 25 | return 0, 0, NO_ROOM 26 | } 27 | 28 | b := p[0] 29 | switch { 30 | case b < 0x80: 31 | return rune(b), 1, SUCCESS 32 | 33 | case b == 0x8e: 34 | if len(p) < 2 { 35 | return 0, 0, NO_ROOM 36 | } 37 | b2 := p[1] 38 | if b2 < 0xa1 || b2 > 0xdf { 39 | return utf8.RuneError, 1, INVALID_CHAR 40 | } 41 | return rune(b2) + (0xff61 - 0xa1), 2, SUCCESS 42 | 43 | case 0xa1 <= b && b <= 0xfe: 44 | return msJISTable.DecodeHigh(p) 45 | } 46 | 47 | return utf8.RuneError, 1, INVALID_CHAR 48 | } 49 | 50 | func encodeCP51932(p []byte, c rune) (size int, status Status) { 51 | if len(p) == 0 { 52 | return 0, NO_ROOM 53 | } 54 | 55 | if c < 0x80 { 56 | p[0] = byte(c) 57 | return 1, SUCCESS 58 | } 59 | 60 | if len(p) < 2 { 61 | return 0, NO_ROOM 62 | } 63 | 64 | if c > 0xffff { 65 | p[0] = '?' 66 | return 1, INVALID_CHAR 67 | } 68 | 69 | if 0xff61 <= c && c <= 0xff9f { 70 | p[0] = 0x8e 71 | p[1] = byte(c - (0xff61 - 0xa1)) 72 | return 2, SUCCESS 73 | } 74 | 75 | return msJISTable.EncodeHigh(p, c) 76 | } 77 | -------------------------------------------------------------------------------- /entity.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | // decoding HTML entities 4 | 5 | import ( 6 | "sort" 7 | ) 8 | 9 | // EntityDecoder returns a Decoder that decodes HTML character entities. 10 | // If there is no valid character entity at the current position, it returns INVALID_CHAR. 11 | // So it needs to be combined with another Decoder via FallbackDecoder. 12 | func EntityDecoder() Decoder { 13 | var leftover rune // leftover rune from two-rune entity 14 | return func(p []byte) (r rune, size int, status Status) { 15 | if leftover != 0 { 16 | r = leftover 17 | leftover = 0 18 | return r, 0, SUCCESS 19 | } 20 | 21 | if len(p) == 0 { 22 | return 0, 0, NO_ROOM 23 | } 24 | 25 | if p[0] != '&' { 26 | return 0xfffd, 1, INVALID_CHAR 27 | } 28 | 29 | if len(p) < 3 { 30 | return 0, 1, NO_ROOM 31 | } 32 | 33 | r, size, status = 0xfffd, 1, INVALID_CHAR 34 | n := 1 // number of bytes read so far 35 | 36 | if p[n] == '#' { 37 | n++ 38 | c := p[n] 39 | hex := false 40 | if c == 'x' || c == 'X' { 41 | hex = true 42 | n++ 43 | } 44 | 45 | var x rune 46 | for n < len(p) { 47 | c = p[n] 48 | n++ 49 | if hex { 50 | if '0' <= c && c <= '9' { 51 | x = 16*x + rune(c) - '0' 52 | continue 53 | } else if 'a' <= c && c <= 'f' { 54 | x = 16*x + rune(c) - 'a' + 10 55 | continue 56 | } else if 'A' <= c && c <= 'F' { 57 | x = 16*x + rune(c) - 'A' + 10 58 | continue 59 | } 60 | } else if '0' <= c && c <= '9' { 61 | x = 10*x + rune(c) - '0' 62 | continue 63 | } 64 | if c != ';' { 65 | n-- 66 | } 67 | break 68 | } 69 | 70 | if n == len(p) && p[n-1] != ';' { 71 | return 0, 0, NO_ROOM 72 | } 73 | 74 | size = n 75 | if p[n-1] == ';' { 76 | n-- 77 | } 78 | if hex { 79 | n-- 80 | } 81 | n-- 82 | // Now n is the number of actual digits read. 83 | if n == 0 { 84 | return 0xfffd, 1, INVALID_CHAR 85 | } 86 | 87 | if 0x80 <= x && x <= 0x9F { 88 | // Replace characters from Windows-1252 with UTF-8 equivalents. 89 | x = replacementTable[x-0x80] 90 | } else if x == 0 || (0xD800 <= x && x <= 0xDFFF) || x > 0x10FFFF { 91 | // Replace invalid characters with the replacement character. 92 | return 0xfffd, size, INVALID_CHAR 93 | } 94 | 95 | r = x 96 | status = SUCCESS 97 | return 98 | } 99 | 100 | // Look for a named entity in EntityList. 101 | 102 | possible := entityList 103 | for len(possible) > 0 { 104 | if len(p) <= n { 105 | leftover = 0 106 | return 0, 0, NO_ROOM 107 | } 108 | 109 | c := p[n] 110 | 111 | // Narrow down the selection in possible to those items that have c in the 112 | // appropriate byte. 113 | first := sort.Search(len(possible), func(i int) bool { 114 | e := possible[i].name 115 | if len(e) < n { 116 | return false 117 | } 118 | return e[n-1] >= c 119 | }) 120 | possible = possible[first:] 121 | last := sort.Search(len(possible), func(i int) bool { 122 | return possible[i].name[n-1] > c 123 | }) 124 | possible = possible[:last] 125 | 126 | n++ 127 | if len(possible) > 0 && len(possible[0].name) == n-1 { 128 | r, leftover = possible[0].r1, possible[0].r2 129 | size = n 130 | status = SUCCESS 131 | // but don't return yet, since we need the longest match 132 | } 133 | } 134 | 135 | return 136 | } 137 | } 138 | 139 | // This table is copied from /src/pkg/html/escape.go in the Go source 140 | // 141 | // These replacements permit compatibility with old numeric entities that 142 | // assumed Windows-1252 encoding. 143 | // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#consume-a-character-reference 144 | var replacementTable = [...]rune{ 145 | '\u20AC', // First entry is what 0x80 should be replaced with. 146 | '\u0081', 147 | '\u201A', 148 | '\u0192', 149 | '\u201E', 150 | '\u2026', 151 | '\u2020', 152 | '\u2021', 153 | '\u02C6', 154 | '\u2030', 155 | '\u0160', 156 | '\u2039', 157 | '\u0152', 158 | '\u008D', 159 | '\u017D', 160 | '\u008F', 161 | '\u0090', 162 | '\u2018', 163 | '\u2019', 164 | '\u201C', 165 | '\u201D', 166 | '\u2022', 167 | '\u2013', 168 | '\u2014', 169 | '\u02DC', 170 | '\u2122', 171 | '\u0161', 172 | '\u203A', 173 | '\u0153', 174 | '\u009D', 175 | '\u017E', 176 | '\u0178', // Last entry is 0x9F. 177 | // 0x00->'\uFFFD' is handled programmatically. 178 | // 0x0D->'\u000D' is a no-op. 179 | } 180 | -------------------------------------------------------------------------------- /entity_data.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | // Taken from /src/pkg/html/entity.go in the Go source code. 4 | 5 | // Copyright 2010 The Go Authors. All rights reserved. 6 | // Use of this source code is governed by a BSD-style 7 | // license that can be found in the LICENSE file. 8 | 9 | //package html 10 | 11 | // entityList is a list of HTML entity names and their values. The semicolon matters: 12 | // http://www.whatwg.org/specs/web-apps/current-work/multipage/named-character-references.html 13 | // lists both "amp" and "amp;" as two separate entries. 14 | // 15 | // Most entities translate to a single rune (r1); a few translate to two (r1 and r2). 16 | // 17 | // Note that the HTML5 list is larger than the HTML4 list at 18 | // http://www.w3.org/TR/html4/sgml/entities.html 19 | var entityList = []struct { 20 | name string 21 | r1, r2 rune 22 | }{ 23 | {"AElig", '\U000000C6', 0}, 24 | {"AElig;", '\U000000C6', 0}, 25 | {"AMP", '\U00000026', 0}, 26 | {"AMP;", '\U00000026', 0}, 27 | {"Aacute", '\U000000C1', 0}, 28 | {"Aacute;", '\U000000C1', 0}, 29 | {"Abreve;", '\U00000102', 0}, 30 | {"Acirc", '\U000000C2', 0}, 31 | {"Acirc;", '\U000000C2', 0}, 32 | {"Acy;", '\U00000410', 0}, 33 | {"Afr;", '\U0001D504', 0}, 34 | {"Agrave", '\U000000C0', 0}, 35 | {"Agrave;", '\U000000C0', 0}, 36 | {"Alpha;", '\U00000391', 0}, 37 | {"Amacr;", '\U00000100', 0}, 38 | {"And;", '\U00002A53', 0}, 39 | {"Aogon;", '\U00000104', 0}, 40 | {"Aopf;", '\U0001D538', 0}, 41 | {"ApplyFunction;", '\U00002061', 0}, 42 | {"Aring", '\U000000C5', 0}, 43 | {"Aring;", '\U000000C5', 0}, 44 | {"Ascr;", '\U0001D49C', 0}, 45 | {"Assign;", '\U00002254', 0}, 46 | {"Atilde", '\U000000C3', 0}, 47 | {"Atilde;", '\U000000C3', 0}, 48 | {"Auml", '\U000000C4', 0}, 49 | {"Auml;", '\U000000C4', 0}, 50 | {"Backslash;", '\U00002216', 0}, 51 | {"Barv;", '\U00002AE7', 0}, 52 | {"Barwed;", '\U00002306', 0}, 53 | {"Bcy;", '\U00000411', 0}, 54 | {"Because;", '\U00002235', 0}, 55 | {"Bernoullis;", '\U0000212C', 0}, 56 | {"Beta;", '\U00000392', 0}, 57 | {"Bfr;", '\U0001D505', 0}, 58 | {"Bopf;", '\U0001D539', 0}, 59 | {"Breve;", '\U000002D8', 0}, 60 | {"Bscr;", '\U0000212C', 0}, 61 | {"Bumpeq;", '\U0000224E', 0}, 62 | {"CHcy;", '\U00000427', 0}, 63 | {"COPY", '\U000000A9', 0}, 64 | {"COPY;", '\U000000A9', 0}, 65 | {"Cacute;", '\U00000106', 0}, 66 | {"Cap;", '\U000022D2', 0}, 67 | {"CapitalDifferentialD;", '\U00002145', 0}, 68 | {"Cayleys;", '\U0000212D', 0}, 69 | {"Ccaron;", '\U0000010C', 0}, 70 | {"Ccedil", '\U000000C7', 0}, 71 | {"Ccedil;", '\U000000C7', 0}, 72 | {"Ccirc;", '\U00000108', 0}, 73 | {"Cconint;", '\U00002230', 0}, 74 | {"Cdot;", '\U0000010A', 0}, 75 | {"Cedilla;", '\U000000B8', 0}, 76 | {"CenterDot;", '\U000000B7', 0}, 77 | {"Cfr;", '\U0000212D', 0}, 78 | {"Chi;", '\U000003A7', 0}, 79 | {"CircleDot;", '\U00002299', 0}, 80 | {"CircleMinus;", '\U00002296', 0}, 81 | {"CirclePlus;", '\U00002295', 0}, 82 | {"CircleTimes;", '\U00002297', 0}, 83 | {"ClockwiseContourIntegral;", '\U00002232', 0}, 84 | {"CloseCurlyDoubleQuote;", '\U0000201D', 0}, 85 | {"CloseCurlyQuote;", '\U00002019', 0}, 86 | {"Colon;", '\U00002237', 0}, 87 | {"Colone;", '\U00002A74', 0}, 88 | {"Congruent;", '\U00002261', 0}, 89 | {"Conint;", '\U0000222F', 0}, 90 | {"ContourIntegral;", '\U0000222E', 0}, 91 | {"Copf;", '\U00002102', 0}, 92 | {"Coproduct;", '\U00002210', 0}, 93 | {"CounterClockwiseContourIntegral;", '\U00002233', 0}, 94 | {"Cross;", '\U00002A2F', 0}, 95 | {"Cscr;", '\U0001D49E', 0}, 96 | {"Cup;", '\U000022D3', 0}, 97 | {"CupCap;", '\U0000224D', 0}, 98 | {"DD;", '\U00002145', 0}, 99 | {"DDotrahd;", '\U00002911', 0}, 100 | {"DJcy;", '\U00000402', 0}, 101 | {"DScy;", '\U00000405', 0}, 102 | {"DZcy;", '\U0000040F', 0}, 103 | {"Dagger;", '\U00002021', 0}, 104 | {"Darr;", '\U000021A1', 0}, 105 | {"Dashv;", '\U00002AE4', 0}, 106 | {"Dcaron;", '\U0000010E', 0}, 107 | {"Dcy;", '\U00000414', 0}, 108 | {"Del;", '\U00002207', 0}, 109 | {"Delta;", '\U00000394', 0}, 110 | {"Dfr;", '\U0001D507', 0}, 111 | {"DiacriticalAcute;", '\U000000B4', 0}, 112 | {"DiacriticalDot;", '\U000002D9', 0}, 113 | {"DiacriticalDoubleAcute;", '\U000002DD', 0}, 114 | {"DiacriticalGrave;", '\U00000060', 0}, 115 | {"DiacriticalTilde;", '\U000002DC', 0}, 116 | {"Diamond;", '\U000022C4', 0}, 117 | {"DifferentialD;", '\U00002146', 0}, 118 | {"Dopf;", '\U0001D53B', 0}, 119 | {"Dot;", '\U000000A8', 0}, 120 | {"DotDot;", '\U000020DC', 0}, 121 | {"DotEqual;", '\U00002250', 0}, 122 | {"DoubleContourIntegral;", '\U0000222F', 0}, 123 | {"DoubleDot;", '\U000000A8', 0}, 124 | {"DoubleDownArrow;", '\U000021D3', 0}, 125 | {"DoubleLeftArrow;", '\U000021D0', 0}, 126 | {"DoubleLeftRightArrow;", '\U000021D4', 0}, 127 | {"DoubleLeftTee;", '\U00002AE4', 0}, 128 | {"DoubleLongLeftArrow;", '\U000027F8', 0}, 129 | {"DoubleLongLeftRightArrow;", '\U000027FA', 0}, 130 | {"DoubleLongRightArrow;", '\U000027F9', 0}, 131 | {"DoubleRightArrow;", '\U000021D2', 0}, 132 | {"DoubleRightTee;", '\U000022A8', 0}, 133 | {"DoubleUpArrow;", '\U000021D1', 0}, 134 | {"DoubleUpDownArrow;", '\U000021D5', 0}, 135 | {"DoubleVerticalBar;", '\U00002225', 0}, 136 | {"DownArrow;", '\U00002193', 0}, 137 | {"DownArrowBar;", '\U00002913', 0}, 138 | {"DownArrowUpArrow;", '\U000021F5', 0}, 139 | {"DownBreve;", '\U00000311', 0}, 140 | {"DownLeftRightVector;", '\U00002950', 0}, 141 | {"DownLeftTeeVector;", '\U0000295E', 0}, 142 | {"DownLeftVector;", '\U000021BD', 0}, 143 | {"DownLeftVectorBar;", '\U00002956', 0}, 144 | {"DownRightTeeVector;", '\U0000295F', 0}, 145 | {"DownRightVector;", '\U000021C1', 0}, 146 | {"DownRightVectorBar;", '\U00002957', 0}, 147 | {"DownTee;", '\U000022A4', 0}, 148 | {"DownTeeArrow;", '\U000021A7', 0}, 149 | {"Downarrow;", '\U000021D3', 0}, 150 | {"Dscr;", '\U0001D49F', 0}, 151 | {"Dstrok;", '\U00000110', 0}, 152 | {"ENG;", '\U0000014A', 0}, 153 | {"ETH", '\U000000D0', 0}, 154 | {"ETH;", '\U000000D0', 0}, 155 | {"Eacute", '\U000000C9', 0}, 156 | {"Eacute;", '\U000000C9', 0}, 157 | {"Ecaron;", '\U0000011A', 0}, 158 | {"Ecirc", '\U000000CA', 0}, 159 | {"Ecirc;", '\U000000CA', 0}, 160 | {"Ecy;", '\U0000042D', 0}, 161 | {"Edot;", '\U00000116', 0}, 162 | {"Efr;", '\U0001D508', 0}, 163 | {"Egrave", '\U000000C8', 0}, 164 | {"Egrave;", '\U000000C8', 0}, 165 | {"Element;", '\U00002208', 0}, 166 | {"Emacr;", '\U00000112', 0}, 167 | {"EmptySmallSquare;", '\U000025FB', 0}, 168 | {"EmptyVerySmallSquare;", '\U000025AB', 0}, 169 | {"Eogon;", '\U00000118', 0}, 170 | {"Eopf;", '\U0001D53C', 0}, 171 | {"Epsilon;", '\U00000395', 0}, 172 | {"Equal;", '\U00002A75', 0}, 173 | {"EqualTilde;", '\U00002242', 0}, 174 | {"Equilibrium;", '\U000021CC', 0}, 175 | {"Escr;", '\U00002130', 0}, 176 | {"Esim;", '\U00002A73', 0}, 177 | {"Eta;", '\U00000397', 0}, 178 | {"Euml", '\U000000CB', 0}, 179 | {"Euml;", '\U000000CB', 0}, 180 | {"Exists;", '\U00002203', 0}, 181 | {"ExponentialE;", '\U00002147', 0}, 182 | {"Fcy;", '\U00000424', 0}, 183 | {"Ffr;", '\U0001D509', 0}, 184 | {"FilledSmallSquare;", '\U000025FC', 0}, 185 | {"FilledVerySmallSquare;", '\U000025AA', 0}, 186 | {"Fopf;", '\U0001D53D', 0}, 187 | {"ForAll;", '\U00002200', 0}, 188 | {"Fouriertrf;", '\U00002131', 0}, 189 | {"Fscr;", '\U00002131', 0}, 190 | {"GJcy;", '\U00000403', 0}, 191 | {"GT", '\U0000003E', 0}, 192 | {"GT;", '\U0000003E', 0}, 193 | {"Gamma;", '\U00000393', 0}, 194 | {"Gammad;", '\U000003DC', 0}, 195 | {"Gbreve;", '\U0000011E', 0}, 196 | {"Gcedil;", '\U00000122', 0}, 197 | {"Gcirc;", '\U0000011C', 0}, 198 | {"Gcy;", '\U00000413', 0}, 199 | {"Gdot;", '\U00000120', 0}, 200 | {"Gfr;", '\U0001D50A', 0}, 201 | {"Gg;", '\U000022D9', 0}, 202 | {"Gopf;", '\U0001D53E', 0}, 203 | {"GreaterEqual;", '\U00002265', 0}, 204 | {"GreaterEqualLess;", '\U000022DB', 0}, 205 | {"GreaterFullEqual;", '\U00002267', 0}, 206 | {"GreaterGreater;", '\U00002AA2', 0}, 207 | {"GreaterLess;", '\U00002277', 0}, 208 | {"GreaterSlantEqual;", '\U00002A7E', 0}, 209 | {"GreaterTilde;", '\U00002273', 0}, 210 | {"Gscr;", '\U0001D4A2', 0}, 211 | {"Gt;", '\U0000226B', 0}, 212 | {"HARDcy;", '\U0000042A', 0}, 213 | {"Hacek;", '\U000002C7', 0}, 214 | {"Hat;", '\U0000005E', 0}, 215 | {"Hcirc;", '\U00000124', 0}, 216 | {"Hfr;", '\U0000210C', 0}, 217 | {"HilbertSpace;", '\U0000210B', 0}, 218 | {"Hopf;", '\U0000210D', 0}, 219 | {"HorizontalLine;", '\U00002500', 0}, 220 | {"Hscr;", '\U0000210B', 0}, 221 | {"Hstrok;", '\U00000126', 0}, 222 | {"HumpDownHump;", '\U0000224E', 0}, 223 | {"HumpEqual;", '\U0000224F', 0}, 224 | {"IEcy;", '\U00000415', 0}, 225 | {"IJlig;", '\U00000132', 0}, 226 | {"IOcy;", '\U00000401', 0}, 227 | {"Iacute", '\U000000CD', 0}, 228 | {"Iacute;", '\U000000CD', 0}, 229 | {"Icirc", '\U000000CE', 0}, 230 | {"Icirc;", '\U000000CE', 0}, 231 | {"Icy;", '\U00000418', 0}, 232 | {"Idot;", '\U00000130', 0}, 233 | {"Ifr;", '\U00002111', 0}, 234 | {"Igrave", '\U000000CC', 0}, 235 | {"Igrave;", '\U000000CC', 0}, 236 | {"Im;", '\U00002111', 0}, 237 | {"Imacr;", '\U0000012A', 0}, 238 | {"ImaginaryI;", '\U00002148', 0}, 239 | {"Implies;", '\U000021D2', 0}, 240 | {"Int;", '\U0000222C', 0}, 241 | {"Integral;", '\U0000222B', 0}, 242 | {"Intersection;", '\U000022C2', 0}, 243 | {"InvisibleComma;", '\U00002063', 0}, 244 | {"InvisibleTimes;", '\U00002062', 0}, 245 | {"Iogon;", '\U0000012E', 0}, 246 | {"Iopf;", '\U0001D540', 0}, 247 | {"Iota;", '\U00000399', 0}, 248 | {"Iscr;", '\U00002110', 0}, 249 | {"Itilde;", '\U00000128', 0}, 250 | {"Iukcy;", '\U00000406', 0}, 251 | {"Iuml", '\U000000CF', 0}, 252 | {"Iuml;", '\U000000CF', 0}, 253 | {"Jcirc;", '\U00000134', 0}, 254 | {"Jcy;", '\U00000419', 0}, 255 | {"Jfr;", '\U0001D50D', 0}, 256 | {"Jopf;", '\U0001D541', 0}, 257 | {"Jscr;", '\U0001D4A5', 0}, 258 | {"Jsercy;", '\U00000408', 0}, 259 | {"Jukcy;", '\U00000404', 0}, 260 | {"KHcy;", '\U00000425', 0}, 261 | {"KJcy;", '\U0000040C', 0}, 262 | {"Kappa;", '\U0000039A', 0}, 263 | {"Kcedil;", '\U00000136', 0}, 264 | {"Kcy;", '\U0000041A', 0}, 265 | {"Kfr;", '\U0001D50E', 0}, 266 | {"Kopf;", '\U0001D542', 0}, 267 | {"Kscr;", '\U0001D4A6', 0}, 268 | {"LJcy;", '\U00000409', 0}, 269 | {"LT", '\U0000003C', 0}, 270 | {"LT;", '\U0000003C', 0}, 271 | {"Lacute;", '\U00000139', 0}, 272 | {"Lambda;", '\U0000039B', 0}, 273 | {"Lang;", '\U000027EA', 0}, 274 | {"Laplacetrf;", '\U00002112', 0}, 275 | {"Larr;", '\U0000219E', 0}, 276 | {"Lcaron;", '\U0000013D', 0}, 277 | {"Lcedil;", '\U0000013B', 0}, 278 | {"Lcy;", '\U0000041B', 0}, 279 | {"LeftAngleBracket;", '\U000027E8', 0}, 280 | {"LeftArrow;", '\U00002190', 0}, 281 | {"LeftArrowBar;", '\U000021E4', 0}, 282 | {"LeftArrowRightArrow;", '\U000021C6', 0}, 283 | {"LeftCeiling;", '\U00002308', 0}, 284 | {"LeftDoubleBracket;", '\U000027E6', 0}, 285 | {"LeftDownTeeVector;", '\U00002961', 0}, 286 | {"LeftDownVector;", '\U000021C3', 0}, 287 | {"LeftDownVectorBar;", '\U00002959', 0}, 288 | {"LeftFloor;", '\U0000230A', 0}, 289 | {"LeftRightArrow;", '\U00002194', 0}, 290 | {"LeftRightVector;", '\U0000294E', 0}, 291 | {"LeftTee;", '\U000022A3', 0}, 292 | {"LeftTeeArrow;", '\U000021A4', 0}, 293 | {"LeftTeeVector;", '\U0000295A', 0}, 294 | {"LeftTriangle;", '\U000022B2', 0}, 295 | {"LeftTriangleBar;", '\U000029CF', 0}, 296 | {"LeftTriangleEqual;", '\U000022B4', 0}, 297 | {"LeftUpDownVector;", '\U00002951', 0}, 298 | {"LeftUpTeeVector;", '\U00002960', 0}, 299 | {"LeftUpVector;", '\U000021BF', 0}, 300 | {"LeftUpVectorBar;", '\U00002958', 0}, 301 | {"LeftVector;", '\U000021BC', 0}, 302 | {"LeftVectorBar;", '\U00002952', 0}, 303 | {"Leftarrow;", '\U000021D0', 0}, 304 | {"Leftrightarrow;", '\U000021D4', 0}, 305 | {"LessEqualGreater;", '\U000022DA', 0}, 306 | {"LessFullEqual;", '\U00002266', 0}, 307 | {"LessGreater;", '\U00002276', 0}, 308 | {"LessLess;", '\U00002AA1', 0}, 309 | {"LessSlantEqual;", '\U00002A7D', 0}, 310 | {"LessTilde;", '\U00002272', 0}, 311 | {"Lfr;", '\U0001D50F', 0}, 312 | {"Ll;", '\U000022D8', 0}, 313 | {"Lleftarrow;", '\U000021DA', 0}, 314 | {"Lmidot;", '\U0000013F', 0}, 315 | {"LongLeftArrow;", '\U000027F5', 0}, 316 | {"LongLeftRightArrow;", '\U000027F7', 0}, 317 | {"LongRightArrow;", '\U000027F6', 0}, 318 | {"Longleftarrow;", '\U000027F8', 0}, 319 | {"Longleftrightarrow;", '\U000027FA', 0}, 320 | {"Longrightarrow;", '\U000027F9', 0}, 321 | {"Lopf;", '\U0001D543', 0}, 322 | {"LowerLeftArrow;", '\U00002199', 0}, 323 | {"LowerRightArrow;", '\U00002198', 0}, 324 | {"Lscr;", '\U00002112', 0}, 325 | {"Lsh;", '\U000021B0', 0}, 326 | {"Lstrok;", '\U00000141', 0}, 327 | {"Lt;", '\U0000226A', 0}, 328 | {"Map;", '\U00002905', 0}, 329 | {"Mcy;", '\U0000041C', 0}, 330 | {"MediumSpace;", '\U0000205F', 0}, 331 | {"Mellintrf;", '\U00002133', 0}, 332 | {"Mfr;", '\U0001D510', 0}, 333 | {"MinusPlus;", '\U00002213', 0}, 334 | {"Mopf;", '\U0001D544', 0}, 335 | {"Mscr;", '\U00002133', 0}, 336 | {"Mu;", '\U0000039C', 0}, 337 | {"NJcy;", '\U0000040A', 0}, 338 | {"Nacute;", '\U00000143', 0}, 339 | {"Ncaron;", '\U00000147', 0}, 340 | {"Ncedil;", '\U00000145', 0}, 341 | {"Ncy;", '\U0000041D', 0}, 342 | {"NegativeMediumSpace;", '\U0000200B', 0}, 343 | {"NegativeThickSpace;", '\U0000200B', 0}, 344 | {"NegativeThinSpace;", '\U0000200B', 0}, 345 | {"NegativeVeryThinSpace;", '\U0000200B', 0}, 346 | {"NestedGreaterGreater;", '\U0000226B', 0}, 347 | {"NestedLessLess;", '\U0000226A', 0}, 348 | {"NewLine;", '\U0000000A', 0}, 349 | {"Nfr;", '\U0001D511', 0}, 350 | {"NoBreak;", '\U00002060', 0}, 351 | {"NonBreakingSpace;", '\U000000A0', 0}, 352 | {"Nopf;", '\U00002115', 0}, 353 | {"Not;", '\U00002AEC', 0}, 354 | {"NotCongruent;", '\U00002262', 0}, 355 | {"NotCupCap;", '\U0000226D', 0}, 356 | {"NotDoubleVerticalBar;", '\U00002226', 0}, 357 | {"NotElement;", '\U00002209', 0}, 358 | {"NotEqual;", '\U00002260', 0}, 359 | {"NotEqualTilde;", '\u2242', '\u0338'}, 360 | {"NotExists;", '\U00002204', 0}, 361 | {"NotGreater;", '\U0000226F', 0}, 362 | {"NotGreaterEqual;", '\U00002271', 0}, 363 | {"NotGreaterFullEqual;", '\u2267', '\u0338'}, 364 | {"NotGreaterGreater;", '\u226B', '\u0338'}, 365 | {"NotGreaterLess;", '\U00002279', 0}, 366 | {"NotGreaterSlantEqual;", '\u2A7E', '\u0338'}, 367 | {"NotGreaterTilde;", '\U00002275', 0}, 368 | {"NotHumpDownHump;", '\u224E', '\u0338'}, 369 | {"NotHumpEqual;", '\u224F', '\u0338'}, 370 | {"NotLeftTriangle;", '\U000022EA', 0}, 371 | {"NotLeftTriangleBar;", '\u29CF', '\u0338'}, 372 | {"NotLeftTriangleEqual;", '\U000022EC', 0}, 373 | {"NotLess;", '\U0000226E', 0}, 374 | {"NotLessEqual;", '\U00002270', 0}, 375 | {"NotLessGreater;", '\U00002278', 0}, 376 | {"NotLessLess;", '\u226A', '\u0338'}, 377 | {"NotLessSlantEqual;", '\u2A7D', '\u0338'}, 378 | {"NotLessTilde;", '\U00002274', 0}, 379 | {"NotNestedGreaterGreater;", '\u2AA2', '\u0338'}, 380 | {"NotNestedLessLess;", '\u2AA1', '\u0338'}, 381 | {"NotPrecedes;", '\U00002280', 0}, 382 | {"NotPrecedesEqual;", '\u2AAF', '\u0338'}, 383 | {"NotPrecedesSlantEqual;", '\U000022E0', 0}, 384 | {"NotReverseElement;", '\U0000220C', 0}, 385 | {"NotRightTriangle;", '\U000022EB', 0}, 386 | {"NotRightTriangleBar;", '\u29D0', '\u0338'}, 387 | {"NotRightTriangleEqual;", '\U000022ED', 0}, 388 | {"NotSquareSubset;", '\u228F', '\u0338'}, 389 | {"NotSquareSubsetEqual;", '\U000022E2', 0}, 390 | {"NotSquareSuperset;", '\u2290', '\u0338'}, 391 | {"NotSquareSupersetEqual;", '\U000022E3', 0}, 392 | {"NotSubset;", '\u2282', '\u20D2'}, 393 | {"NotSubsetEqual;", '\U00002288', 0}, 394 | {"NotSucceeds;", '\U00002281', 0}, 395 | {"NotSucceedsEqual;", '\u2AB0', '\u0338'}, 396 | {"NotSucceedsSlantEqual;", '\U000022E1', 0}, 397 | {"NotSucceedsTilde;", '\u227F', '\u0338'}, 398 | {"NotSuperset;", '\u2283', '\u20D2'}, 399 | {"NotSupersetEqual;", '\U00002289', 0}, 400 | {"NotTilde;", '\U00002241', 0}, 401 | {"NotTildeEqual;", '\U00002244', 0}, 402 | {"NotTildeFullEqual;", '\U00002247', 0}, 403 | {"NotTildeTilde;", '\U00002249', 0}, 404 | {"NotVerticalBar;", '\U00002224', 0}, 405 | {"Nscr;", '\U0001D4A9', 0}, 406 | {"Ntilde", '\U000000D1', 0}, 407 | {"Ntilde;", '\U000000D1', 0}, 408 | {"Nu;", '\U0000039D', 0}, 409 | {"OElig;", '\U00000152', 0}, 410 | {"Oacute", '\U000000D3', 0}, 411 | {"Oacute;", '\U000000D3', 0}, 412 | {"Ocirc", '\U000000D4', 0}, 413 | {"Ocirc;", '\U000000D4', 0}, 414 | {"Ocy;", '\U0000041E', 0}, 415 | {"Odblac;", '\U00000150', 0}, 416 | {"Ofr;", '\U0001D512', 0}, 417 | {"Ograve", '\U000000D2', 0}, 418 | {"Ograve;", '\U000000D2', 0}, 419 | {"Omacr;", '\U0000014C', 0}, 420 | {"Omega;", '\U000003A9', 0}, 421 | {"Omicron;", '\U0000039F', 0}, 422 | {"Oopf;", '\U0001D546', 0}, 423 | {"OpenCurlyDoubleQuote;", '\U0000201C', 0}, 424 | {"OpenCurlyQuote;", '\U00002018', 0}, 425 | {"Or;", '\U00002A54', 0}, 426 | {"Oscr;", '\U0001D4AA', 0}, 427 | {"Oslash", '\U000000D8', 0}, 428 | {"Oslash;", '\U000000D8', 0}, 429 | {"Otilde", '\U000000D5', 0}, 430 | {"Otilde;", '\U000000D5', 0}, 431 | {"Otimes;", '\U00002A37', 0}, 432 | {"Ouml", '\U000000D6', 0}, 433 | {"Ouml;", '\U000000D6', 0}, 434 | {"OverBar;", '\U0000203E', 0}, 435 | {"OverBrace;", '\U000023DE', 0}, 436 | {"OverBracket;", '\U000023B4', 0}, 437 | {"OverParenthesis;", '\U000023DC', 0}, 438 | {"PartialD;", '\U00002202', 0}, 439 | {"Pcy;", '\U0000041F', 0}, 440 | {"Pfr;", '\U0001D513', 0}, 441 | {"Phi;", '\U000003A6', 0}, 442 | {"Pi;", '\U000003A0', 0}, 443 | {"PlusMinus;", '\U000000B1', 0}, 444 | {"Poincareplane;", '\U0000210C', 0}, 445 | {"Popf;", '\U00002119', 0}, 446 | {"Pr;", '\U00002ABB', 0}, 447 | {"Precedes;", '\U0000227A', 0}, 448 | {"PrecedesEqual;", '\U00002AAF', 0}, 449 | {"PrecedesSlantEqual;", '\U0000227C', 0}, 450 | {"PrecedesTilde;", '\U0000227E', 0}, 451 | {"Prime;", '\U00002033', 0}, 452 | {"Product;", '\U0000220F', 0}, 453 | {"Proportion;", '\U00002237', 0}, 454 | {"Proportional;", '\U0000221D', 0}, 455 | {"Pscr;", '\U0001D4AB', 0}, 456 | {"Psi;", '\U000003A8', 0}, 457 | {"QUOT", '\U00000022', 0}, 458 | {"QUOT;", '\U00000022', 0}, 459 | {"Qfr;", '\U0001D514', 0}, 460 | {"Qopf;", '\U0000211A', 0}, 461 | {"Qscr;", '\U0001D4AC', 0}, 462 | {"RBarr;", '\U00002910', 0}, 463 | {"REG", '\U000000AE', 0}, 464 | {"REG;", '\U000000AE', 0}, 465 | {"Racute;", '\U00000154', 0}, 466 | {"Rang;", '\U000027EB', 0}, 467 | {"Rarr;", '\U000021A0', 0}, 468 | {"Rarrtl;", '\U00002916', 0}, 469 | {"Rcaron;", '\U00000158', 0}, 470 | {"Rcedil;", '\U00000156', 0}, 471 | {"Rcy;", '\U00000420', 0}, 472 | {"Re;", '\U0000211C', 0}, 473 | {"ReverseElement;", '\U0000220B', 0}, 474 | {"ReverseEquilibrium;", '\U000021CB', 0}, 475 | {"ReverseUpEquilibrium;", '\U0000296F', 0}, 476 | {"Rfr;", '\U0000211C', 0}, 477 | {"Rho;", '\U000003A1', 0}, 478 | {"RightAngleBracket;", '\U000027E9', 0}, 479 | {"RightArrow;", '\U00002192', 0}, 480 | {"RightArrowBar;", '\U000021E5', 0}, 481 | {"RightArrowLeftArrow;", '\U000021C4', 0}, 482 | {"RightCeiling;", '\U00002309', 0}, 483 | {"RightDoubleBracket;", '\U000027E7', 0}, 484 | {"RightDownTeeVector;", '\U0000295D', 0}, 485 | {"RightDownVector;", '\U000021C2', 0}, 486 | {"RightDownVectorBar;", '\U00002955', 0}, 487 | {"RightFloor;", '\U0000230B', 0}, 488 | {"RightTee;", '\U000022A2', 0}, 489 | {"RightTeeArrow;", '\U000021A6', 0}, 490 | {"RightTeeVector;", '\U0000295B', 0}, 491 | {"RightTriangle;", '\U000022B3', 0}, 492 | {"RightTriangleBar;", '\U000029D0', 0}, 493 | {"RightTriangleEqual;", '\U000022B5', 0}, 494 | {"RightUpDownVector;", '\U0000294F', 0}, 495 | {"RightUpTeeVector;", '\U0000295C', 0}, 496 | {"RightUpVector;", '\U000021BE', 0}, 497 | {"RightUpVectorBar;", '\U00002954', 0}, 498 | {"RightVector;", '\U000021C0', 0}, 499 | {"RightVectorBar;", '\U00002953', 0}, 500 | {"Rightarrow;", '\U000021D2', 0}, 501 | {"Ropf;", '\U0000211D', 0}, 502 | {"RoundImplies;", '\U00002970', 0}, 503 | {"Rrightarrow;", '\U000021DB', 0}, 504 | {"Rscr;", '\U0000211B', 0}, 505 | {"Rsh;", '\U000021B1', 0}, 506 | {"RuleDelayed;", '\U000029F4', 0}, 507 | {"SHCHcy;", '\U00000429', 0}, 508 | {"SHcy;", '\U00000428', 0}, 509 | {"SOFTcy;", '\U0000042C', 0}, 510 | {"Sacute;", '\U0000015A', 0}, 511 | {"Sc;", '\U00002ABC', 0}, 512 | {"Scaron;", '\U00000160', 0}, 513 | {"Scedil;", '\U0000015E', 0}, 514 | {"Scirc;", '\U0000015C', 0}, 515 | {"Scy;", '\U00000421', 0}, 516 | {"Sfr;", '\U0001D516', 0}, 517 | {"ShortDownArrow;", '\U00002193', 0}, 518 | {"ShortLeftArrow;", '\U00002190', 0}, 519 | {"ShortRightArrow;", '\U00002192', 0}, 520 | {"ShortUpArrow;", '\U00002191', 0}, 521 | {"Sigma;", '\U000003A3', 0}, 522 | {"SmallCircle;", '\U00002218', 0}, 523 | {"Sopf;", '\U0001D54A', 0}, 524 | {"Sqrt;", '\U0000221A', 0}, 525 | {"Square;", '\U000025A1', 0}, 526 | {"SquareIntersection;", '\U00002293', 0}, 527 | {"SquareSubset;", '\U0000228F', 0}, 528 | {"SquareSubsetEqual;", '\U00002291', 0}, 529 | {"SquareSuperset;", '\U00002290', 0}, 530 | {"SquareSupersetEqual;", '\U00002292', 0}, 531 | {"SquareUnion;", '\U00002294', 0}, 532 | {"Sscr;", '\U0001D4AE', 0}, 533 | {"Star;", '\U000022C6', 0}, 534 | {"Sub;", '\U000022D0', 0}, 535 | {"Subset;", '\U000022D0', 0}, 536 | {"SubsetEqual;", '\U00002286', 0}, 537 | {"Succeeds;", '\U0000227B', 0}, 538 | {"SucceedsEqual;", '\U00002AB0', 0}, 539 | {"SucceedsSlantEqual;", '\U0000227D', 0}, 540 | {"SucceedsTilde;", '\U0000227F', 0}, 541 | {"SuchThat;", '\U0000220B', 0}, 542 | {"Sum;", '\U00002211', 0}, 543 | {"Sup;", '\U000022D1', 0}, 544 | {"Superset;", '\U00002283', 0}, 545 | {"SupersetEqual;", '\U00002287', 0}, 546 | {"Supset;", '\U000022D1', 0}, 547 | {"THORN", '\U000000DE', 0}, 548 | {"THORN;", '\U000000DE', 0}, 549 | {"TRADE;", '\U00002122', 0}, 550 | {"TSHcy;", '\U0000040B', 0}, 551 | {"TScy;", '\U00000426', 0}, 552 | {"Tab;", '\U00000009', 0}, 553 | {"Tau;", '\U000003A4', 0}, 554 | {"Tcaron;", '\U00000164', 0}, 555 | {"Tcedil;", '\U00000162', 0}, 556 | {"Tcy;", '\U00000422', 0}, 557 | {"Tfr;", '\U0001D517', 0}, 558 | {"Therefore;", '\U00002234', 0}, 559 | {"Theta;", '\U00000398', 0}, 560 | {"ThickSpace;", '\u205F', '\u200A'}, 561 | {"ThinSpace;", '\U00002009', 0}, 562 | {"Tilde;", '\U0000223C', 0}, 563 | {"TildeEqual;", '\U00002243', 0}, 564 | {"TildeFullEqual;", '\U00002245', 0}, 565 | {"TildeTilde;", '\U00002248', 0}, 566 | {"Topf;", '\U0001D54B', 0}, 567 | {"TripleDot;", '\U000020DB', 0}, 568 | {"Tscr;", '\U0001D4AF', 0}, 569 | {"Tstrok;", '\U00000166', 0}, 570 | {"Uacute", '\U000000DA', 0}, 571 | {"Uacute;", '\U000000DA', 0}, 572 | {"Uarr;", '\U0000219F', 0}, 573 | {"Uarrocir;", '\U00002949', 0}, 574 | {"Ubrcy;", '\U0000040E', 0}, 575 | {"Ubreve;", '\U0000016C', 0}, 576 | {"Ucirc", '\U000000DB', 0}, 577 | {"Ucirc;", '\U000000DB', 0}, 578 | {"Ucy;", '\U00000423', 0}, 579 | {"Udblac;", '\U00000170', 0}, 580 | {"Ufr;", '\U0001D518', 0}, 581 | {"Ugrave", '\U000000D9', 0}, 582 | {"Ugrave;", '\U000000D9', 0}, 583 | {"Umacr;", '\U0000016A', 0}, 584 | {"UnderBar;", '\U0000005F', 0}, 585 | {"UnderBrace;", '\U000023DF', 0}, 586 | {"UnderBracket;", '\U000023B5', 0}, 587 | {"UnderParenthesis;", '\U000023DD', 0}, 588 | {"Union;", '\U000022C3', 0}, 589 | {"UnionPlus;", '\U0000228E', 0}, 590 | {"Uogon;", '\U00000172', 0}, 591 | {"Uopf;", '\U0001D54C', 0}, 592 | {"UpArrow;", '\U00002191', 0}, 593 | {"UpArrowBar;", '\U00002912', 0}, 594 | {"UpArrowDownArrow;", '\U000021C5', 0}, 595 | {"UpDownArrow;", '\U00002195', 0}, 596 | {"UpEquilibrium;", '\U0000296E', 0}, 597 | {"UpTee;", '\U000022A5', 0}, 598 | {"UpTeeArrow;", '\U000021A5', 0}, 599 | {"Uparrow;", '\U000021D1', 0}, 600 | {"Updownarrow;", '\U000021D5', 0}, 601 | {"UpperLeftArrow;", '\U00002196', 0}, 602 | {"UpperRightArrow;", '\U00002197', 0}, 603 | {"Upsi;", '\U000003D2', 0}, 604 | {"Upsilon;", '\U000003A5', 0}, 605 | {"Uring;", '\U0000016E', 0}, 606 | {"Uscr;", '\U0001D4B0', 0}, 607 | {"Utilde;", '\U00000168', 0}, 608 | {"Uuml", '\U000000DC', 0}, 609 | {"Uuml;", '\U000000DC', 0}, 610 | {"VDash;", '\U000022AB', 0}, 611 | {"Vbar;", '\U00002AEB', 0}, 612 | {"Vcy;", '\U00000412', 0}, 613 | {"Vdash;", '\U000022A9', 0}, 614 | {"Vdashl;", '\U00002AE6', 0}, 615 | {"Vee;", '\U000022C1', 0}, 616 | {"Verbar;", '\U00002016', 0}, 617 | {"Vert;", '\U00002016', 0}, 618 | {"VerticalBar;", '\U00002223', 0}, 619 | {"VerticalLine;", '\U0000007C', 0}, 620 | {"VerticalSeparator;", '\U00002758', 0}, 621 | {"VerticalTilde;", '\U00002240', 0}, 622 | {"VeryThinSpace;", '\U0000200A', 0}, 623 | {"Vfr;", '\U0001D519', 0}, 624 | {"Vopf;", '\U0001D54D', 0}, 625 | {"Vscr;", '\U0001D4B1', 0}, 626 | {"Vvdash;", '\U000022AA', 0}, 627 | {"Wcirc;", '\U00000174', 0}, 628 | {"Wedge;", '\U000022C0', 0}, 629 | {"Wfr;", '\U0001D51A', 0}, 630 | {"Wopf;", '\U0001D54E', 0}, 631 | {"Wscr;", '\U0001D4B2', 0}, 632 | {"Xfr;", '\U0001D51B', 0}, 633 | {"Xi;", '\U0000039E', 0}, 634 | {"Xopf;", '\U0001D54F', 0}, 635 | {"Xscr;", '\U0001D4B3', 0}, 636 | {"YAcy;", '\U0000042F', 0}, 637 | {"YIcy;", '\U00000407', 0}, 638 | {"YUcy;", '\U0000042E', 0}, 639 | {"Yacute", '\U000000DD', 0}, 640 | {"Yacute;", '\U000000DD', 0}, 641 | {"Ycirc;", '\U00000176', 0}, 642 | {"Ycy;", '\U0000042B', 0}, 643 | {"Yfr;", '\U0001D51C', 0}, 644 | {"Yopf;", '\U0001D550', 0}, 645 | {"Yscr;", '\U0001D4B4', 0}, 646 | {"Yuml;", '\U00000178', 0}, 647 | {"ZHcy;", '\U00000416', 0}, 648 | {"Zacute;", '\U00000179', 0}, 649 | {"Zcaron;", '\U0000017D', 0}, 650 | {"Zcy;", '\U00000417', 0}, 651 | {"Zdot;", '\U0000017B', 0}, 652 | {"ZeroWidthSpace;", '\U0000200B', 0}, 653 | {"Zeta;", '\U00000396', 0}, 654 | {"Zfr;", '\U00002128', 0}, 655 | {"Zopf;", '\U00002124', 0}, 656 | {"Zscr;", '\U0001D4B5', 0}, 657 | {"aacute", '\U000000E1', 0}, 658 | {"aacute;", '\U000000E1', 0}, 659 | {"abreve;", '\U00000103', 0}, 660 | {"ac;", '\U0000223E', 0}, 661 | {"acE;", '\u223E', '\u0333'}, 662 | {"acd;", '\U0000223F', 0}, 663 | {"acirc", '\U000000E2', 0}, 664 | {"acirc;", '\U000000E2', 0}, 665 | {"acute", '\U000000B4', 0}, 666 | {"acute;", '\U000000B4', 0}, 667 | {"acy;", '\U00000430', 0}, 668 | {"aelig", '\U000000E6', 0}, 669 | {"aelig;", '\U000000E6', 0}, 670 | {"af;", '\U00002061', 0}, 671 | {"afr;", '\U0001D51E', 0}, 672 | {"agrave", '\U000000E0', 0}, 673 | {"agrave;", '\U000000E0', 0}, 674 | {"alefsym;", '\U00002135', 0}, 675 | {"aleph;", '\U00002135', 0}, 676 | {"alpha;", '\U000003B1', 0}, 677 | {"amacr;", '\U00000101', 0}, 678 | {"amalg;", '\U00002A3F', 0}, 679 | {"amp", '\U00000026', 0}, 680 | {"amp;", '\U00000026', 0}, 681 | {"and;", '\U00002227', 0}, 682 | {"andand;", '\U00002A55', 0}, 683 | {"andd;", '\U00002A5C', 0}, 684 | {"andslope;", '\U00002A58', 0}, 685 | {"andv;", '\U00002A5A', 0}, 686 | {"ang;", '\U00002220', 0}, 687 | {"ange;", '\U000029A4', 0}, 688 | {"angle;", '\U00002220', 0}, 689 | {"angmsd;", '\U00002221', 0}, 690 | {"angmsdaa;", '\U000029A8', 0}, 691 | {"angmsdab;", '\U000029A9', 0}, 692 | {"angmsdac;", '\U000029AA', 0}, 693 | {"angmsdad;", '\U000029AB', 0}, 694 | {"angmsdae;", '\U000029AC', 0}, 695 | {"angmsdaf;", '\U000029AD', 0}, 696 | {"angmsdag;", '\U000029AE', 0}, 697 | {"angmsdah;", '\U000029AF', 0}, 698 | {"angrt;", '\U0000221F', 0}, 699 | {"angrtvb;", '\U000022BE', 0}, 700 | {"angrtvbd;", '\U0000299D', 0}, 701 | {"angsph;", '\U00002222', 0}, 702 | {"angst;", '\U000000C5', 0}, 703 | {"angzarr;", '\U0000237C', 0}, 704 | {"aogon;", '\U00000105', 0}, 705 | {"aopf;", '\U0001D552', 0}, 706 | {"ap;", '\U00002248', 0}, 707 | {"apE;", '\U00002A70', 0}, 708 | {"apacir;", '\U00002A6F', 0}, 709 | {"ape;", '\U0000224A', 0}, 710 | {"apid;", '\U0000224B', 0}, 711 | {"apos;", '\U00000027', 0}, 712 | {"approx;", '\U00002248', 0}, 713 | {"approxeq;", '\U0000224A', 0}, 714 | {"aring", '\U000000E5', 0}, 715 | {"aring;", '\U000000E5', 0}, 716 | {"ascr;", '\U0001D4B6', 0}, 717 | {"ast;", '\U0000002A', 0}, 718 | {"asymp;", '\U00002248', 0}, 719 | {"asympeq;", '\U0000224D', 0}, 720 | {"atilde", '\U000000E3', 0}, 721 | {"atilde;", '\U000000E3', 0}, 722 | {"auml", '\U000000E4', 0}, 723 | {"auml;", '\U000000E4', 0}, 724 | {"awconint;", '\U00002233', 0}, 725 | {"awint;", '\U00002A11', 0}, 726 | {"bNot;", '\U00002AED', 0}, 727 | {"backcong;", '\U0000224C', 0}, 728 | {"backepsilon;", '\U000003F6', 0}, 729 | {"backprime;", '\U00002035', 0}, 730 | {"backsim;", '\U0000223D', 0}, 731 | {"backsimeq;", '\U000022CD', 0}, 732 | {"barvee;", '\U000022BD', 0}, 733 | {"barwed;", '\U00002305', 0}, 734 | {"barwedge;", '\U00002305', 0}, 735 | {"bbrk;", '\U000023B5', 0}, 736 | {"bbrktbrk;", '\U000023B6', 0}, 737 | {"bcong;", '\U0000224C', 0}, 738 | {"bcy;", '\U00000431', 0}, 739 | {"bdquo;", '\U0000201E', 0}, 740 | {"becaus;", '\U00002235', 0}, 741 | {"because;", '\U00002235', 0}, 742 | {"bemptyv;", '\U000029B0', 0}, 743 | {"bepsi;", '\U000003F6', 0}, 744 | {"bernou;", '\U0000212C', 0}, 745 | {"beta;", '\U000003B2', 0}, 746 | {"beth;", '\U00002136', 0}, 747 | {"between;", '\U0000226C', 0}, 748 | {"bfr;", '\U0001D51F', 0}, 749 | {"bigcap;", '\U000022C2', 0}, 750 | {"bigcirc;", '\U000025EF', 0}, 751 | {"bigcup;", '\U000022C3', 0}, 752 | {"bigodot;", '\U00002A00', 0}, 753 | {"bigoplus;", '\U00002A01', 0}, 754 | {"bigotimes;", '\U00002A02', 0}, 755 | {"bigsqcup;", '\U00002A06', 0}, 756 | {"bigstar;", '\U00002605', 0}, 757 | {"bigtriangledown;", '\U000025BD', 0}, 758 | {"bigtriangleup;", '\U000025B3', 0}, 759 | {"biguplus;", '\U00002A04', 0}, 760 | {"bigvee;", '\U000022C1', 0}, 761 | {"bigwedge;", '\U000022C0', 0}, 762 | {"bkarow;", '\U0000290D', 0}, 763 | {"blacklozenge;", '\U000029EB', 0}, 764 | {"blacksquare;", '\U000025AA', 0}, 765 | {"blacktriangle;", '\U000025B4', 0}, 766 | {"blacktriangledown;", '\U000025BE', 0}, 767 | {"blacktriangleleft;", '\U000025C2', 0}, 768 | {"blacktriangleright;", '\U000025B8', 0}, 769 | {"blank;", '\U00002423', 0}, 770 | {"blk12;", '\U00002592', 0}, 771 | {"blk14;", '\U00002591', 0}, 772 | {"blk34;", '\U00002593', 0}, 773 | {"block;", '\U00002588', 0}, 774 | {"bne;", '\u003D', '\u20E5'}, 775 | {"bnequiv;", '\u2261', '\u20E5'}, 776 | {"bnot;", '\U00002310', 0}, 777 | {"bopf;", '\U0001D553', 0}, 778 | {"bot;", '\U000022A5', 0}, 779 | {"bottom;", '\U000022A5', 0}, 780 | {"bowtie;", '\U000022C8', 0}, 781 | {"boxDL;", '\U00002557', 0}, 782 | {"boxDR;", '\U00002554', 0}, 783 | {"boxDl;", '\U00002556', 0}, 784 | {"boxDr;", '\U00002553', 0}, 785 | {"boxH;", '\U00002550', 0}, 786 | {"boxHD;", '\U00002566', 0}, 787 | {"boxHU;", '\U00002569', 0}, 788 | {"boxHd;", '\U00002564', 0}, 789 | {"boxHu;", '\U00002567', 0}, 790 | {"boxUL;", '\U0000255D', 0}, 791 | {"boxUR;", '\U0000255A', 0}, 792 | {"boxUl;", '\U0000255C', 0}, 793 | {"boxUr;", '\U00002559', 0}, 794 | {"boxV;", '\U00002551', 0}, 795 | {"boxVH;", '\U0000256C', 0}, 796 | {"boxVL;", '\U00002563', 0}, 797 | {"boxVR;", '\U00002560', 0}, 798 | {"boxVh;", '\U0000256B', 0}, 799 | {"boxVl;", '\U00002562', 0}, 800 | {"boxVr;", '\U0000255F', 0}, 801 | {"boxbox;", '\U000029C9', 0}, 802 | {"boxdL;", '\U00002555', 0}, 803 | {"boxdR;", '\U00002552', 0}, 804 | {"boxdl;", '\U00002510', 0}, 805 | {"boxdr;", '\U0000250C', 0}, 806 | {"boxh;", '\U00002500', 0}, 807 | {"boxhD;", '\U00002565', 0}, 808 | {"boxhU;", '\U00002568', 0}, 809 | {"boxhd;", '\U0000252C', 0}, 810 | {"boxhu;", '\U00002534', 0}, 811 | {"boxminus;", '\U0000229F', 0}, 812 | {"boxplus;", '\U0000229E', 0}, 813 | {"boxtimes;", '\U000022A0', 0}, 814 | {"boxuL;", '\U0000255B', 0}, 815 | {"boxuR;", '\U00002558', 0}, 816 | {"boxul;", '\U00002518', 0}, 817 | {"boxur;", '\U00002514', 0}, 818 | {"boxv;", '\U00002502', 0}, 819 | {"boxvH;", '\U0000256A', 0}, 820 | {"boxvL;", '\U00002561', 0}, 821 | {"boxvR;", '\U0000255E', 0}, 822 | {"boxvh;", '\U0000253C', 0}, 823 | {"boxvl;", '\U00002524', 0}, 824 | {"boxvr;", '\U0000251C', 0}, 825 | {"bprime;", '\U00002035', 0}, 826 | {"breve;", '\U000002D8', 0}, 827 | {"brvbar", '\U000000A6', 0}, 828 | {"brvbar;", '\U000000A6', 0}, 829 | {"bscr;", '\U0001D4B7', 0}, 830 | {"bsemi;", '\U0000204F', 0}, 831 | {"bsim;", '\U0000223D', 0}, 832 | {"bsime;", '\U000022CD', 0}, 833 | {"bsol;", '\U0000005C', 0}, 834 | {"bsolb;", '\U000029C5', 0}, 835 | {"bsolhsub;", '\U000027C8', 0}, 836 | {"bull;", '\U00002022', 0}, 837 | {"bullet;", '\U00002022', 0}, 838 | {"bump;", '\U0000224E', 0}, 839 | {"bumpE;", '\U00002AAE', 0}, 840 | {"bumpe;", '\U0000224F', 0}, 841 | {"bumpeq;", '\U0000224F', 0}, 842 | {"cacute;", '\U00000107', 0}, 843 | {"cap;", '\U00002229', 0}, 844 | {"capand;", '\U00002A44', 0}, 845 | {"capbrcup;", '\U00002A49', 0}, 846 | {"capcap;", '\U00002A4B', 0}, 847 | {"capcup;", '\U00002A47', 0}, 848 | {"capdot;", '\U00002A40', 0}, 849 | {"caps;", '\u2229', '\uFE00'}, 850 | {"caret;", '\U00002041', 0}, 851 | {"caron;", '\U000002C7', 0}, 852 | {"ccaps;", '\U00002A4D', 0}, 853 | {"ccaron;", '\U0000010D', 0}, 854 | {"ccedil", '\U000000E7', 0}, 855 | {"ccedil;", '\U000000E7', 0}, 856 | {"ccirc;", '\U00000109', 0}, 857 | {"ccups;", '\U00002A4C', 0}, 858 | {"ccupssm;", '\U00002A50', 0}, 859 | {"cdot;", '\U0000010B', 0}, 860 | {"cedil", '\U000000B8', 0}, 861 | {"cedil;", '\U000000B8', 0}, 862 | {"cemptyv;", '\U000029B2', 0}, 863 | {"cent", '\U000000A2', 0}, 864 | {"cent;", '\U000000A2', 0}, 865 | {"centerdot;", '\U000000B7', 0}, 866 | {"cfr;", '\U0001D520', 0}, 867 | {"chcy;", '\U00000447', 0}, 868 | {"check;", '\U00002713', 0}, 869 | {"checkmark;", '\U00002713', 0}, 870 | {"chi;", '\U000003C7', 0}, 871 | {"cir;", '\U000025CB', 0}, 872 | {"cirE;", '\U000029C3', 0}, 873 | {"circ;", '\U000002C6', 0}, 874 | {"circeq;", '\U00002257', 0}, 875 | {"circlearrowleft;", '\U000021BA', 0}, 876 | {"circlearrowright;", '\U000021BB', 0}, 877 | {"circledR;", '\U000000AE', 0}, 878 | {"circledS;", '\U000024C8', 0}, 879 | {"circledast;", '\U0000229B', 0}, 880 | {"circledcirc;", '\U0000229A', 0}, 881 | {"circleddash;", '\U0000229D', 0}, 882 | {"cire;", '\U00002257', 0}, 883 | {"cirfnint;", '\U00002A10', 0}, 884 | {"cirmid;", '\U00002AEF', 0}, 885 | {"cirscir;", '\U000029C2', 0}, 886 | {"clubs;", '\U00002663', 0}, 887 | {"clubsuit;", '\U00002663', 0}, 888 | {"colon;", '\U0000003A', 0}, 889 | {"colone;", '\U00002254', 0}, 890 | {"coloneq;", '\U00002254', 0}, 891 | {"comma;", '\U0000002C', 0}, 892 | {"commat;", '\U00000040', 0}, 893 | {"comp;", '\U00002201', 0}, 894 | {"compfn;", '\U00002218', 0}, 895 | {"complement;", '\U00002201', 0}, 896 | {"complexes;", '\U00002102', 0}, 897 | {"cong;", '\U00002245', 0}, 898 | {"congdot;", '\U00002A6D', 0}, 899 | {"conint;", '\U0000222E', 0}, 900 | {"copf;", '\U0001D554', 0}, 901 | {"coprod;", '\U00002210', 0}, 902 | {"copy", '\U000000A9', 0}, 903 | {"copy;", '\U000000A9', 0}, 904 | {"copysr;", '\U00002117', 0}, 905 | {"crarr;", '\U000021B5', 0}, 906 | {"cross;", '\U00002717', 0}, 907 | {"cscr;", '\U0001D4B8', 0}, 908 | {"csub;", '\U00002ACF', 0}, 909 | {"csube;", '\U00002AD1', 0}, 910 | {"csup;", '\U00002AD0', 0}, 911 | {"csupe;", '\U00002AD2', 0}, 912 | {"ctdot;", '\U000022EF', 0}, 913 | {"cudarrl;", '\U00002938', 0}, 914 | {"cudarrr;", '\U00002935', 0}, 915 | {"cuepr;", '\U000022DE', 0}, 916 | {"cuesc;", '\U000022DF', 0}, 917 | {"cularr;", '\U000021B6', 0}, 918 | {"cularrp;", '\U0000293D', 0}, 919 | {"cup;", '\U0000222A', 0}, 920 | {"cupbrcap;", '\U00002A48', 0}, 921 | {"cupcap;", '\U00002A46', 0}, 922 | {"cupcup;", '\U00002A4A', 0}, 923 | {"cupdot;", '\U0000228D', 0}, 924 | {"cupor;", '\U00002A45', 0}, 925 | {"cups;", '\u222A', '\uFE00'}, 926 | {"curarr;", '\U000021B7', 0}, 927 | {"curarrm;", '\U0000293C', 0}, 928 | {"curlyeqprec;", '\U000022DE', 0}, 929 | {"curlyeqsucc;", '\U000022DF', 0}, 930 | {"curlyvee;", '\U000022CE', 0}, 931 | {"curlywedge;", '\U000022CF', 0}, 932 | {"curren", '\U000000A4', 0}, 933 | {"curren;", '\U000000A4', 0}, 934 | {"curvearrowleft;", '\U000021B6', 0}, 935 | {"curvearrowright;", '\U000021B7', 0}, 936 | {"cuvee;", '\U000022CE', 0}, 937 | {"cuwed;", '\U000022CF', 0}, 938 | {"cwconint;", '\U00002232', 0}, 939 | {"cwint;", '\U00002231', 0}, 940 | {"cylcty;", '\U0000232D', 0}, 941 | {"dArr;", '\U000021D3', 0}, 942 | {"dHar;", '\U00002965', 0}, 943 | {"dagger;", '\U00002020', 0}, 944 | {"daleth;", '\U00002138', 0}, 945 | {"darr;", '\U00002193', 0}, 946 | {"dash;", '\U00002010', 0}, 947 | {"dashv;", '\U000022A3', 0}, 948 | {"dbkarow;", '\U0000290F', 0}, 949 | {"dblac;", '\U000002DD', 0}, 950 | {"dcaron;", '\U0000010F', 0}, 951 | {"dcy;", '\U00000434', 0}, 952 | {"dd;", '\U00002146', 0}, 953 | {"ddagger;", '\U00002021', 0}, 954 | {"ddarr;", '\U000021CA', 0}, 955 | {"ddotseq;", '\U00002A77', 0}, 956 | {"deg", '\U000000B0', 0}, 957 | {"deg;", '\U000000B0', 0}, 958 | {"delta;", '\U000003B4', 0}, 959 | {"demptyv;", '\U000029B1', 0}, 960 | {"dfisht;", '\U0000297F', 0}, 961 | {"dfr;", '\U0001D521', 0}, 962 | {"dharl;", '\U000021C3', 0}, 963 | {"dharr;", '\U000021C2', 0}, 964 | {"diam;", '\U000022C4', 0}, 965 | {"diamond;", '\U000022C4', 0}, 966 | {"diamondsuit;", '\U00002666', 0}, 967 | {"diams;", '\U00002666', 0}, 968 | {"die;", '\U000000A8', 0}, 969 | {"digamma;", '\U000003DD', 0}, 970 | {"disin;", '\U000022F2', 0}, 971 | {"div;", '\U000000F7', 0}, 972 | {"divide", '\U000000F7', 0}, 973 | {"divide;", '\U000000F7', 0}, 974 | {"divideontimes;", '\U000022C7', 0}, 975 | {"divonx;", '\U000022C7', 0}, 976 | {"djcy;", '\U00000452', 0}, 977 | {"dlcorn;", '\U0000231E', 0}, 978 | {"dlcrop;", '\U0000230D', 0}, 979 | {"dollar;", '\U00000024', 0}, 980 | {"dopf;", '\U0001D555', 0}, 981 | {"dot;", '\U000002D9', 0}, 982 | {"doteq;", '\U00002250', 0}, 983 | {"doteqdot;", '\U00002251', 0}, 984 | {"dotminus;", '\U00002238', 0}, 985 | {"dotplus;", '\U00002214', 0}, 986 | {"dotsquare;", '\U000022A1', 0}, 987 | {"doublebarwedge;", '\U00002306', 0}, 988 | {"downarrow;", '\U00002193', 0}, 989 | {"downdownarrows;", '\U000021CA', 0}, 990 | {"downharpoonleft;", '\U000021C3', 0}, 991 | {"downharpoonright;", '\U000021C2', 0}, 992 | {"drbkarow;", '\U00002910', 0}, 993 | {"drcorn;", '\U0000231F', 0}, 994 | {"drcrop;", '\U0000230C', 0}, 995 | {"dscr;", '\U0001D4B9', 0}, 996 | {"dscy;", '\U00000455', 0}, 997 | {"dsol;", '\U000029F6', 0}, 998 | {"dstrok;", '\U00000111', 0}, 999 | {"dtdot;", '\U000022F1', 0}, 1000 | {"dtri;", '\U000025BF', 0}, 1001 | {"dtrif;", '\U000025BE', 0}, 1002 | {"duarr;", '\U000021F5', 0}, 1003 | {"duhar;", '\U0000296F', 0}, 1004 | {"dwangle;", '\U000029A6', 0}, 1005 | {"dzcy;", '\U0000045F', 0}, 1006 | {"dzigrarr;", '\U000027FF', 0}, 1007 | {"eDDot;", '\U00002A77', 0}, 1008 | {"eDot;", '\U00002251', 0}, 1009 | {"eacute", '\U000000E9', 0}, 1010 | {"eacute;", '\U000000E9', 0}, 1011 | {"easter;", '\U00002A6E', 0}, 1012 | {"ecaron;", '\U0000011B', 0}, 1013 | {"ecir;", '\U00002256', 0}, 1014 | {"ecirc", '\U000000EA', 0}, 1015 | {"ecirc;", '\U000000EA', 0}, 1016 | {"ecolon;", '\U00002255', 0}, 1017 | {"ecy;", '\U0000044D', 0}, 1018 | {"edot;", '\U00000117', 0}, 1019 | {"ee;", '\U00002147', 0}, 1020 | {"efDot;", '\U00002252', 0}, 1021 | {"efr;", '\U0001D522', 0}, 1022 | {"eg;", '\U00002A9A', 0}, 1023 | {"egrave", '\U000000E8', 0}, 1024 | {"egrave;", '\U000000E8', 0}, 1025 | {"egs;", '\U00002A96', 0}, 1026 | {"egsdot;", '\U00002A98', 0}, 1027 | {"el;", '\U00002A99', 0}, 1028 | {"elinters;", '\U000023E7', 0}, 1029 | {"ell;", '\U00002113', 0}, 1030 | {"els;", '\U00002A95', 0}, 1031 | {"elsdot;", '\U00002A97', 0}, 1032 | {"emacr;", '\U00000113', 0}, 1033 | {"empty;", '\U00002205', 0}, 1034 | {"emptyset;", '\U00002205', 0}, 1035 | {"emptyv;", '\U00002205', 0}, 1036 | {"emsp13;", '\U00002004', 0}, 1037 | {"emsp14;", '\U00002005', 0}, 1038 | {"emsp;", '\U00002003', 0}, 1039 | {"eng;", '\U0000014B', 0}, 1040 | {"ensp;", '\U00002002', 0}, 1041 | {"eogon;", '\U00000119', 0}, 1042 | {"eopf;", '\U0001D556', 0}, 1043 | {"epar;", '\U000022D5', 0}, 1044 | {"eparsl;", '\U000029E3', 0}, 1045 | {"eplus;", '\U00002A71', 0}, 1046 | {"epsi;", '\U000003B5', 0}, 1047 | {"epsilon;", '\U000003B5', 0}, 1048 | {"epsiv;", '\U000003F5', 0}, 1049 | {"eqcirc;", '\U00002256', 0}, 1050 | {"eqcolon;", '\U00002255', 0}, 1051 | {"eqsim;", '\U00002242', 0}, 1052 | {"eqslantgtr;", '\U00002A96', 0}, 1053 | {"eqslantless;", '\U00002A95', 0}, 1054 | {"equals;", '\U0000003D', 0}, 1055 | {"equest;", '\U0000225F', 0}, 1056 | {"equiv;", '\U00002261', 0}, 1057 | {"equivDD;", '\U00002A78', 0}, 1058 | {"eqvparsl;", '\U000029E5', 0}, 1059 | {"erDot;", '\U00002253', 0}, 1060 | {"erarr;", '\U00002971', 0}, 1061 | {"escr;", '\U0000212F', 0}, 1062 | {"esdot;", '\U00002250', 0}, 1063 | {"esim;", '\U00002242', 0}, 1064 | {"eta;", '\U000003B7', 0}, 1065 | {"eth", '\U000000F0', 0}, 1066 | {"eth;", '\U000000F0', 0}, 1067 | {"euml", '\U000000EB', 0}, 1068 | {"euml;", '\U000000EB', 0}, 1069 | {"euro;", '\U000020AC', 0}, 1070 | {"excl;", '\U00000021', 0}, 1071 | {"exist;", '\U00002203', 0}, 1072 | {"expectation;", '\U00002130', 0}, 1073 | {"exponentiale;", '\U00002147', 0}, 1074 | {"fallingdotseq;", '\U00002252', 0}, 1075 | {"fcy;", '\U00000444', 0}, 1076 | {"female;", '\U00002640', 0}, 1077 | {"ffilig;", '\U0000FB03', 0}, 1078 | {"fflig;", '\U0000FB00', 0}, 1079 | {"ffllig;", '\U0000FB04', 0}, 1080 | {"ffr;", '\U0001D523', 0}, 1081 | {"filig;", '\U0000FB01', 0}, 1082 | {"fjlig;", '\u0066', '\u006A'}, 1083 | {"flat;", '\U0000266D', 0}, 1084 | {"fllig;", '\U0000FB02', 0}, 1085 | {"fltns;", '\U000025B1', 0}, 1086 | {"fnof;", '\U00000192', 0}, 1087 | {"fopf;", '\U0001D557', 0}, 1088 | {"forall;", '\U00002200', 0}, 1089 | {"fork;", '\U000022D4', 0}, 1090 | {"forkv;", '\U00002AD9', 0}, 1091 | {"fpartint;", '\U00002A0D', 0}, 1092 | {"frac12", '\U000000BD', 0}, 1093 | {"frac12;", '\U000000BD', 0}, 1094 | {"frac13;", '\U00002153', 0}, 1095 | {"frac14", '\U000000BC', 0}, 1096 | {"frac14;", '\U000000BC', 0}, 1097 | {"frac15;", '\U00002155', 0}, 1098 | {"frac16;", '\U00002159', 0}, 1099 | {"frac18;", '\U0000215B', 0}, 1100 | {"frac23;", '\U00002154', 0}, 1101 | {"frac25;", '\U00002156', 0}, 1102 | {"frac34", '\U000000BE', 0}, 1103 | {"frac34;", '\U000000BE', 0}, 1104 | {"frac35;", '\U00002157', 0}, 1105 | {"frac38;", '\U0000215C', 0}, 1106 | {"frac45;", '\U00002158', 0}, 1107 | {"frac56;", '\U0000215A', 0}, 1108 | {"frac58;", '\U0000215D', 0}, 1109 | {"frac78;", '\U0000215E', 0}, 1110 | {"frasl;", '\U00002044', 0}, 1111 | {"frown;", '\U00002322', 0}, 1112 | {"fscr;", '\U0001D4BB', 0}, 1113 | {"gE;", '\U00002267', 0}, 1114 | {"gEl;", '\U00002A8C', 0}, 1115 | {"gacute;", '\U000001F5', 0}, 1116 | {"gamma;", '\U000003B3', 0}, 1117 | {"gammad;", '\U000003DD', 0}, 1118 | {"gap;", '\U00002A86', 0}, 1119 | {"gbreve;", '\U0000011F', 0}, 1120 | {"gcirc;", '\U0000011D', 0}, 1121 | {"gcy;", '\U00000433', 0}, 1122 | {"gdot;", '\U00000121', 0}, 1123 | {"ge;", '\U00002265', 0}, 1124 | {"gel;", '\U000022DB', 0}, 1125 | {"geq;", '\U00002265', 0}, 1126 | {"geqq;", '\U00002267', 0}, 1127 | {"geqslant;", '\U00002A7E', 0}, 1128 | {"ges;", '\U00002A7E', 0}, 1129 | {"gescc;", '\U00002AA9', 0}, 1130 | {"gesdot;", '\U00002A80', 0}, 1131 | {"gesdoto;", '\U00002A82', 0}, 1132 | {"gesdotol;", '\U00002A84', 0}, 1133 | {"gesl;", '\u22DB', '\uFE00'}, 1134 | {"gesles;", '\U00002A94', 0}, 1135 | {"gfr;", '\U0001D524', 0}, 1136 | {"gg;", '\U0000226B', 0}, 1137 | {"ggg;", '\U000022D9', 0}, 1138 | {"gimel;", '\U00002137', 0}, 1139 | {"gjcy;", '\U00000453', 0}, 1140 | {"gl;", '\U00002277', 0}, 1141 | {"glE;", '\U00002A92', 0}, 1142 | {"gla;", '\U00002AA5', 0}, 1143 | {"glj;", '\U00002AA4', 0}, 1144 | {"gnE;", '\U00002269', 0}, 1145 | {"gnap;", '\U00002A8A', 0}, 1146 | {"gnapprox;", '\U00002A8A', 0}, 1147 | {"gne;", '\U00002A88', 0}, 1148 | {"gneq;", '\U00002A88', 0}, 1149 | {"gneqq;", '\U00002269', 0}, 1150 | {"gnsim;", '\U000022E7', 0}, 1151 | {"gopf;", '\U0001D558', 0}, 1152 | {"grave;", '\U00000060', 0}, 1153 | {"gscr;", '\U0000210A', 0}, 1154 | {"gsim;", '\U00002273', 0}, 1155 | {"gsime;", '\U00002A8E', 0}, 1156 | {"gsiml;", '\U00002A90', 0}, 1157 | {"gt", '\U0000003E', 0}, 1158 | {"gt;", '\U0000003E', 0}, 1159 | {"gtcc;", '\U00002AA7', 0}, 1160 | {"gtcir;", '\U00002A7A', 0}, 1161 | {"gtdot;", '\U000022D7', 0}, 1162 | {"gtlPar;", '\U00002995', 0}, 1163 | {"gtquest;", '\U00002A7C', 0}, 1164 | {"gtrapprox;", '\U00002A86', 0}, 1165 | {"gtrarr;", '\U00002978', 0}, 1166 | {"gtrdot;", '\U000022D7', 0}, 1167 | {"gtreqless;", '\U000022DB', 0}, 1168 | {"gtreqqless;", '\U00002A8C', 0}, 1169 | {"gtrless;", '\U00002277', 0}, 1170 | {"gtrsim;", '\U00002273', 0}, 1171 | {"gvertneqq;", '\u2269', '\uFE00'}, 1172 | {"gvnE;", '\u2269', '\uFE00'}, 1173 | {"hArr;", '\U000021D4', 0}, 1174 | {"hairsp;", '\U0000200A', 0}, 1175 | {"half;", '\U000000BD', 0}, 1176 | {"hamilt;", '\U0000210B', 0}, 1177 | {"hardcy;", '\U0000044A', 0}, 1178 | {"harr;", '\U00002194', 0}, 1179 | {"harrcir;", '\U00002948', 0}, 1180 | {"harrw;", '\U000021AD', 0}, 1181 | {"hbar;", '\U0000210F', 0}, 1182 | {"hcirc;", '\U00000125', 0}, 1183 | {"hearts;", '\U00002665', 0}, 1184 | {"heartsuit;", '\U00002665', 0}, 1185 | {"hellip;", '\U00002026', 0}, 1186 | {"hercon;", '\U000022B9', 0}, 1187 | {"hfr;", '\U0001D525', 0}, 1188 | {"hksearow;", '\U00002925', 0}, 1189 | {"hkswarow;", '\U00002926', 0}, 1190 | {"hoarr;", '\U000021FF', 0}, 1191 | {"homtht;", '\U0000223B', 0}, 1192 | {"hookleftarrow;", '\U000021A9', 0}, 1193 | {"hookrightarrow;", '\U000021AA', 0}, 1194 | {"hopf;", '\U0001D559', 0}, 1195 | {"horbar;", '\U00002015', 0}, 1196 | {"hscr;", '\U0001D4BD', 0}, 1197 | {"hslash;", '\U0000210F', 0}, 1198 | {"hstrok;", '\U00000127', 0}, 1199 | {"hybull;", '\U00002043', 0}, 1200 | {"hyphen;", '\U00002010', 0}, 1201 | {"iacute", '\U000000ED', 0}, 1202 | {"iacute;", '\U000000ED', 0}, 1203 | {"ic;", '\U00002063', 0}, 1204 | {"icirc", '\U000000EE', 0}, 1205 | {"icirc;", '\U000000EE', 0}, 1206 | {"icy;", '\U00000438', 0}, 1207 | {"iecy;", '\U00000435', 0}, 1208 | {"iexcl", '\U000000A1', 0}, 1209 | {"iexcl;", '\U000000A1', 0}, 1210 | {"iff;", '\U000021D4', 0}, 1211 | {"ifr;", '\U0001D526', 0}, 1212 | {"igrave", '\U000000EC', 0}, 1213 | {"igrave;", '\U000000EC', 0}, 1214 | {"ii;", '\U00002148', 0}, 1215 | {"iiiint;", '\U00002A0C', 0}, 1216 | {"iiint;", '\U0000222D', 0}, 1217 | {"iinfin;", '\U000029DC', 0}, 1218 | {"iiota;", '\U00002129', 0}, 1219 | {"ijlig;", '\U00000133', 0}, 1220 | {"imacr;", '\U0000012B', 0}, 1221 | {"image;", '\U00002111', 0}, 1222 | {"imagline;", '\U00002110', 0}, 1223 | {"imagpart;", '\U00002111', 0}, 1224 | {"imath;", '\U00000131', 0}, 1225 | {"imof;", '\U000022B7', 0}, 1226 | {"imped;", '\U000001B5', 0}, 1227 | {"in;", '\U00002208', 0}, 1228 | {"incare;", '\U00002105', 0}, 1229 | {"infin;", '\U0000221E', 0}, 1230 | {"infintie;", '\U000029DD', 0}, 1231 | {"inodot;", '\U00000131', 0}, 1232 | {"int;", '\U0000222B', 0}, 1233 | {"intcal;", '\U000022BA', 0}, 1234 | {"integers;", '\U00002124', 0}, 1235 | {"intercal;", '\U000022BA', 0}, 1236 | {"intlarhk;", '\U00002A17', 0}, 1237 | {"intprod;", '\U00002A3C', 0}, 1238 | {"iocy;", '\U00000451', 0}, 1239 | {"iogon;", '\U0000012F', 0}, 1240 | {"iopf;", '\U0001D55A', 0}, 1241 | {"iota;", '\U000003B9', 0}, 1242 | {"iprod;", '\U00002A3C', 0}, 1243 | {"iquest", '\U000000BF', 0}, 1244 | {"iquest;", '\U000000BF', 0}, 1245 | {"iscr;", '\U0001D4BE', 0}, 1246 | {"isin;", '\U00002208', 0}, 1247 | {"isinE;", '\U000022F9', 0}, 1248 | {"isindot;", '\U000022F5', 0}, 1249 | {"isins;", '\U000022F4', 0}, 1250 | {"isinsv;", '\U000022F3', 0}, 1251 | {"isinv;", '\U00002208', 0}, 1252 | {"it;", '\U00002062', 0}, 1253 | {"itilde;", '\U00000129', 0}, 1254 | {"iukcy;", '\U00000456', 0}, 1255 | {"iuml", '\U000000EF', 0}, 1256 | {"iuml;", '\U000000EF', 0}, 1257 | {"jcirc;", '\U00000135', 0}, 1258 | {"jcy;", '\U00000439', 0}, 1259 | {"jfr;", '\U0001D527', 0}, 1260 | {"jmath;", '\U00000237', 0}, 1261 | {"jopf;", '\U0001D55B', 0}, 1262 | {"jscr;", '\U0001D4BF', 0}, 1263 | {"jsercy;", '\U00000458', 0}, 1264 | {"jukcy;", '\U00000454', 0}, 1265 | {"kappa;", '\U000003BA', 0}, 1266 | {"kappav;", '\U000003F0', 0}, 1267 | {"kcedil;", '\U00000137', 0}, 1268 | {"kcy;", '\U0000043A', 0}, 1269 | {"kfr;", '\U0001D528', 0}, 1270 | {"kgreen;", '\U00000138', 0}, 1271 | {"khcy;", '\U00000445', 0}, 1272 | {"kjcy;", '\U0000045C', 0}, 1273 | {"kopf;", '\U0001D55C', 0}, 1274 | {"kscr;", '\U0001D4C0', 0}, 1275 | {"lAarr;", '\U000021DA', 0}, 1276 | {"lArr;", '\U000021D0', 0}, 1277 | {"lAtail;", '\U0000291B', 0}, 1278 | {"lBarr;", '\U0000290E', 0}, 1279 | {"lE;", '\U00002266', 0}, 1280 | {"lEg;", '\U00002A8B', 0}, 1281 | {"lHar;", '\U00002962', 0}, 1282 | {"lacute;", '\U0000013A', 0}, 1283 | {"laemptyv;", '\U000029B4', 0}, 1284 | {"lagran;", '\U00002112', 0}, 1285 | {"lambda;", '\U000003BB', 0}, 1286 | {"lang;", '\U000027E8', 0}, 1287 | {"langd;", '\U00002991', 0}, 1288 | {"langle;", '\U000027E8', 0}, 1289 | {"lap;", '\U00002A85', 0}, 1290 | {"laquo", '\U000000AB', 0}, 1291 | {"laquo;", '\U000000AB', 0}, 1292 | {"larr;", '\U00002190', 0}, 1293 | {"larrb;", '\U000021E4', 0}, 1294 | {"larrbfs;", '\U0000291F', 0}, 1295 | {"larrfs;", '\U0000291D', 0}, 1296 | {"larrhk;", '\U000021A9', 0}, 1297 | {"larrlp;", '\U000021AB', 0}, 1298 | {"larrpl;", '\U00002939', 0}, 1299 | {"larrsim;", '\U00002973', 0}, 1300 | {"larrtl;", '\U000021A2', 0}, 1301 | {"lat;", '\U00002AAB', 0}, 1302 | {"latail;", '\U00002919', 0}, 1303 | {"late;", '\U00002AAD', 0}, 1304 | {"lates;", '\u2AAD', '\uFE00'}, 1305 | {"lbarr;", '\U0000290C', 0}, 1306 | {"lbbrk;", '\U00002772', 0}, 1307 | {"lbrace;", '\U0000007B', 0}, 1308 | {"lbrack;", '\U0000005B', 0}, 1309 | {"lbrke;", '\U0000298B', 0}, 1310 | {"lbrksld;", '\U0000298F', 0}, 1311 | {"lbrkslu;", '\U0000298D', 0}, 1312 | {"lcaron;", '\U0000013E', 0}, 1313 | {"lcedil;", '\U0000013C', 0}, 1314 | {"lceil;", '\U00002308', 0}, 1315 | {"lcub;", '\U0000007B', 0}, 1316 | {"lcy;", '\U0000043B', 0}, 1317 | {"ldca;", '\U00002936', 0}, 1318 | {"ldquo;", '\U0000201C', 0}, 1319 | {"ldquor;", '\U0000201E', 0}, 1320 | {"ldrdhar;", '\U00002967', 0}, 1321 | {"ldrushar;", '\U0000294B', 0}, 1322 | {"ldsh;", '\U000021B2', 0}, 1323 | {"le;", '\U00002264', 0}, 1324 | {"leftarrow;", '\U00002190', 0}, 1325 | {"leftarrowtail;", '\U000021A2', 0}, 1326 | {"leftharpoondown;", '\U000021BD', 0}, 1327 | {"leftharpoonup;", '\U000021BC', 0}, 1328 | {"leftleftarrows;", '\U000021C7', 0}, 1329 | {"leftrightarrow;", '\U00002194', 0}, 1330 | {"leftrightarrows;", '\U000021C6', 0}, 1331 | {"leftrightharpoons;", '\U000021CB', 0}, 1332 | {"leftrightsquigarrow;", '\U000021AD', 0}, 1333 | {"leftthreetimes;", '\U000022CB', 0}, 1334 | {"leg;", '\U000022DA', 0}, 1335 | {"leq;", '\U00002264', 0}, 1336 | {"leqq;", '\U00002266', 0}, 1337 | {"leqslant;", '\U00002A7D', 0}, 1338 | {"les;", '\U00002A7D', 0}, 1339 | {"lescc;", '\U00002AA8', 0}, 1340 | {"lesdot;", '\U00002A7F', 0}, 1341 | {"lesdoto;", '\U00002A81', 0}, 1342 | {"lesdotor;", '\U00002A83', 0}, 1343 | {"lesg;", '\u22DA', '\uFE00'}, 1344 | {"lesges;", '\U00002A93', 0}, 1345 | {"lessapprox;", '\U00002A85', 0}, 1346 | {"lessdot;", '\U000022D6', 0}, 1347 | {"lesseqgtr;", '\U000022DA', 0}, 1348 | {"lesseqqgtr;", '\U00002A8B', 0}, 1349 | {"lessgtr;", '\U00002276', 0}, 1350 | {"lesssim;", '\U00002272', 0}, 1351 | {"lfisht;", '\U0000297C', 0}, 1352 | {"lfloor;", '\U0000230A', 0}, 1353 | {"lfr;", '\U0001D529', 0}, 1354 | {"lg;", '\U00002276', 0}, 1355 | {"lgE;", '\U00002A91', 0}, 1356 | {"lhard;", '\U000021BD', 0}, 1357 | {"lharu;", '\U000021BC', 0}, 1358 | {"lharul;", '\U0000296A', 0}, 1359 | {"lhblk;", '\U00002584', 0}, 1360 | {"ljcy;", '\U00000459', 0}, 1361 | {"ll;", '\U0000226A', 0}, 1362 | {"llarr;", '\U000021C7', 0}, 1363 | {"llcorner;", '\U0000231E', 0}, 1364 | {"llhard;", '\U0000296B', 0}, 1365 | {"lltri;", '\U000025FA', 0}, 1366 | {"lmidot;", '\U00000140', 0}, 1367 | {"lmoust;", '\U000023B0', 0}, 1368 | {"lmoustache;", '\U000023B0', 0}, 1369 | {"lnE;", '\U00002268', 0}, 1370 | {"lnap;", '\U00002A89', 0}, 1371 | {"lnapprox;", '\U00002A89', 0}, 1372 | {"lne;", '\U00002A87', 0}, 1373 | {"lneq;", '\U00002A87', 0}, 1374 | {"lneqq;", '\U00002268', 0}, 1375 | {"lnsim;", '\U000022E6', 0}, 1376 | {"loang;", '\U000027EC', 0}, 1377 | {"loarr;", '\U000021FD', 0}, 1378 | {"lobrk;", '\U000027E6', 0}, 1379 | {"longleftarrow;", '\U000027F5', 0}, 1380 | {"longleftrightarrow;", '\U000027F7', 0}, 1381 | {"longmapsto;", '\U000027FC', 0}, 1382 | {"longrightarrow;", '\U000027F6', 0}, 1383 | {"looparrowleft;", '\U000021AB', 0}, 1384 | {"looparrowright;", '\U000021AC', 0}, 1385 | {"lopar;", '\U00002985', 0}, 1386 | {"lopf;", '\U0001D55D', 0}, 1387 | {"loplus;", '\U00002A2D', 0}, 1388 | {"lotimes;", '\U00002A34', 0}, 1389 | {"lowast;", '\U00002217', 0}, 1390 | {"lowbar;", '\U0000005F', 0}, 1391 | {"loz;", '\U000025CA', 0}, 1392 | {"lozenge;", '\U000025CA', 0}, 1393 | {"lozf;", '\U000029EB', 0}, 1394 | {"lpar;", '\U00000028', 0}, 1395 | {"lparlt;", '\U00002993', 0}, 1396 | {"lrarr;", '\U000021C6', 0}, 1397 | {"lrcorner;", '\U0000231F', 0}, 1398 | {"lrhar;", '\U000021CB', 0}, 1399 | {"lrhard;", '\U0000296D', 0}, 1400 | {"lrm;", '\U0000200E', 0}, 1401 | {"lrtri;", '\U000022BF', 0}, 1402 | {"lsaquo;", '\U00002039', 0}, 1403 | {"lscr;", '\U0001D4C1', 0}, 1404 | {"lsh;", '\U000021B0', 0}, 1405 | {"lsim;", '\U00002272', 0}, 1406 | {"lsime;", '\U00002A8D', 0}, 1407 | {"lsimg;", '\U00002A8F', 0}, 1408 | {"lsqb;", '\U0000005B', 0}, 1409 | {"lsquo;", '\U00002018', 0}, 1410 | {"lsquor;", '\U0000201A', 0}, 1411 | {"lstrok;", '\U00000142', 0}, 1412 | {"lt", '\U0000003C', 0}, 1413 | {"lt;", '\U0000003C', 0}, 1414 | {"ltcc;", '\U00002AA6', 0}, 1415 | {"ltcir;", '\U00002A79', 0}, 1416 | {"ltdot;", '\U000022D6', 0}, 1417 | {"lthree;", '\U000022CB', 0}, 1418 | {"ltimes;", '\U000022C9', 0}, 1419 | {"ltlarr;", '\U00002976', 0}, 1420 | {"ltquest;", '\U00002A7B', 0}, 1421 | {"ltrPar;", '\U00002996', 0}, 1422 | {"ltri;", '\U000025C3', 0}, 1423 | {"ltrie;", '\U000022B4', 0}, 1424 | {"ltrif;", '\U000025C2', 0}, 1425 | {"lurdshar;", '\U0000294A', 0}, 1426 | {"luruhar;", '\U00002966', 0}, 1427 | {"lvertneqq;", '\u2268', '\uFE00'}, 1428 | {"lvnE;", '\u2268', '\uFE00'}, 1429 | {"mDDot;", '\U0000223A', 0}, 1430 | {"macr", '\U000000AF', 0}, 1431 | {"macr;", '\U000000AF', 0}, 1432 | {"male;", '\U00002642', 0}, 1433 | {"malt;", '\U00002720', 0}, 1434 | {"maltese;", '\U00002720', 0}, 1435 | {"map;", '\U000021A6', 0}, 1436 | {"mapsto;", '\U000021A6', 0}, 1437 | {"mapstodown;", '\U000021A7', 0}, 1438 | {"mapstoleft;", '\U000021A4', 0}, 1439 | {"mapstoup;", '\U000021A5', 0}, 1440 | {"marker;", '\U000025AE', 0}, 1441 | {"mcomma;", '\U00002A29', 0}, 1442 | {"mcy;", '\U0000043C', 0}, 1443 | {"mdash;", '\U00002014', 0}, 1444 | {"measuredangle;", '\U00002221', 0}, 1445 | {"mfr;", '\U0001D52A', 0}, 1446 | {"mho;", '\U00002127', 0}, 1447 | {"micro", '\U000000B5', 0}, 1448 | {"micro;", '\U000000B5', 0}, 1449 | {"mid;", '\U00002223', 0}, 1450 | {"midast;", '\U0000002A', 0}, 1451 | {"midcir;", '\U00002AF0', 0}, 1452 | {"middot", '\U000000B7', 0}, 1453 | {"middot;", '\U000000B7', 0}, 1454 | {"minus;", '\U00002212', 0}, 1455 | {"minusb;", '\U0000229F', 0}, 1456 | {"minusd;", '\U00002238', 0}, 1457 | {"minusdu;", '\U00002A2A', 0}, 1458 | {"mlcp;", '\U00002ADB', 0}, 1459 | {"mldr;", '\U00002026', 0}, 1460 | {"mnplus;", '\U00002213', 0}, 1461 | {"models;", '\U000022A7', 0}, 1462 | {"mopf;", '\U0001D55E', 0}, 1463 | {"mp;", '\U00002213', 0}, 1464 | {"mscr;", '\U0001D4C2', 0}, 1465 | {"mstpos;", '\U0000223E', 0}, 1466 | {"mu;", '\U000003BC', 0}, 1467 | {"multimap;", '\U000022B8', 0}, 1468 | {"mumap;", '\U000022B8', 0}, 1469 | {"nGg;", '\u22D9', '\u0338'}, 1470 | {"nGt;", '\u226B', '\u20D2'}, 1471 | {"nGtv;", '\u226B', '\u0338'}, 1472 | {"nLeftarrow;", '\U000021CD', 0}, 1473 | {"nLeftrightarrow;", '\U000021CE', 0}, 1474 | {"nLl;", '\u22D8', '\u0338'}, 1475 | {"nLt;", '\u226A', '\u20D2'}, 1476 | {"nLtv;", '\u226A', '\u0338'}, 1477 | {"nRightarrow;", '\U000021CF', 0}, 1478 | {"nVDash;", '\U000022AF', 0}, 1479 | {"nVdash;", '\U000022AE', 0}, 1480 | {"nabla;", '\U00002207', 0}, 1481 | {"nacute;", '\U00000144', 0}, 1482 | {"nang;", '\u2220', '\u20D2'}, 1483 | {"nap;", '\U00002249', 0}, 1484 | {"napE;", '\u2A70', '\u0338'}, 1485 | {"napid;", '\u224B', '\u0338'}, 1486 | {"napos;", '\U00000149', 0}, 1487 | {"napprox;", '\U00002249', 0}, 1488 | {"natur;", '\U0000266E', 0}, 1489 | {"natural;", '\U0000266E', 0}, 1490 | {"naturals;", '\U00002115', 0}, 1491 | {"nbsp", '\U000000A0', 0}, 1492 | {"nbsp;", '\U000000A0', 0}, 1493 | {"nbump;", '\u224E', '\u0338'}, 1494 | {"nbumpe;", '\u224F', '\u0338'}, 1495 | {"ncap;", '\U00002A43', 0}, 1496 | {"ncaron;", '\U00000148', 0}, 1497 | {"ncedil;", '\U00000146', 0}, 1498 | {"ncong;", '\U00002247', 0}, 1499 | {"ncongdot;", '\u2A6D', '\u0338'}, 1500 | {"ncup;", '\U00002A42', 0}, 1501 | {"ncy;", '\U0000043D', 0}, 1502 | {"ndash;", '\U00002013', 0}, 1503 | {"ne;", '\U00002260', 0}, 1504 | {"neArr;", '\U000021D7', 0}, 1505 | {"nearhk;", '\U00002924', 0}, 1506 | {"nearr;", '\U00002197', 0}, 1507 | {"nearrow;", '\U00002197', 0}, 1508 | {"nedot;", '\u2250', '\u0338'}, 1509 | {"nequiv;", '\U00002262', 0}, 1510 | {"nesear;", '\U00002928', 0}, 1511 | {"nesim;", '\u2242', '\u0338'}, 1512 | {"nexist;", '\U00002204', 0}, 1513 | {"nexists;", '\U00002204', 0}, 1514 | {"nfr;", '\U0001D52B', 0}, 1515 | {"ngE;", '\u2267', '\u0338'}, 1516 | {"nge;", '\U00002271', 0}, 1517 | {"ngeq;", '\U00002271', 0}, 1518 | {"ngeqq;", '\u2267', '\u0338'}, 1519 | {"ngeqslant;", '\u2A7E', '\u0338'}, 1520 | {"nges;", '\u2A7E', '\u0338'}, 1521 | {"ngsim;", '\U00002275', 0}, 1522 | {"ngt;", '\U0000226F', 0}, 1523 | {"ngtr;", '\U0000226F', 0}, 1524 | {"nhArr;", '\U000021CE', 0}, 1525 | {"nharr;", '\U000021AE', 0}, 1526 | {"nhpar;", '\U00002AF2', 0}, 1527 | {"ni;", '\U0000220B', 0}, 1528 | {"nis;", '\U000022FC', 0}, 1529 | {"nisd;", '\U000022FA', 0}, 1530 | {"niv;", '\U0000220B', 0}, 1531 | {"njcy;", '\U0000045A', 0}, 1532 | {"nlArr;", '\U000021CD', 0}, 1533 | {"nlE;", '\u2266', '\u0338'}, 1534 | {"nlarr;", '\U0000219A', 0}, 1535 | {"nldr;", '\U00002025', 0}, 1536 | {"nle;", '\U00002270', 0}, 1537 | {"nleftarrow;", '\U0000219A', 0}, 1538 | {"nleftrightarrow;", '\U000021AE', 0}, 1539 | {"nleq;", '\U00002270', 0}, 1540 | {"nleqq;", '\u2266', '\u0338'}, 1541 | {"nleqslant;", '\u2A7D', '\u0338'}, 1542 | {"nles;", '\u2A7D', '\u0338'}, 1543 | {"nless;", '\U0000226E', 0}, 1544 | {"nlsim;", '\U00002274', 0}, 1545 | {"nlt;", '\U0000226E', 0}, 1546 | {"nltri;", '\U000022EA', 0}, 1547 | {"nltrie;", '\U000022EC', 0}, 1548 | {"nmid;", '\U00002224', 0}, 1549 | {"nopf;", '\U0001D55F', 0}, 1550 | {"not", '\U000000AC', 0}, 1551 | {"not;", '\U000000AC', 0}, 1552 | {"notin;", '\U00002209', 0}, 1553 | {"notinE;", '\u22F9', '\u0338'}, 1554 | {"notindot;", '\u22F5', '\u0338'}, 1555 | {"notinva;", '\U00002209', 0}, 1556 | {"notinvb;", '\U000022F7', 0}, 1557 | {"notinvc;", '\U000022F6', 0}, 1558 | {"notni;", '\U0000220C', 0}, 1559 | {"notniva;", '\U0000220C', 0}, 1560 | {"notnivb;", '\U000022FE', 0}, 1561 | {"notnivc;", '\U000022FD', 0}, 1562 | {"npar;", '\U00002226', 0}, 1563 | {"nparallel;", '\U00002226', 0}, 1564 | {"nparsl;", '\u2AFD', '\u20E5'}, 1565 | {"npart;", '\u2202', '\u0338'}, 1566 | {"npolint;", '\U00002A14', 0}, 1567 | {"npr;", '\U00002280', 0}, 1568 | {"nprcue;", '\U000022E0', 0}, 1569 | {"npre;", '\u2AAF', '\u0338'}, 1570 | {"nprec;", '\U00002280', 0}, 1571 | {"npreceq;", '\u2AAF', '\u0338'}, 1572 | {"nrArr;", '\U000021CF', 0}, 1573 | {"nrarr;", '\U0000219B', 0}, 1574 | {"nrarrc;", '\u2933', '\u0338'}, 1575 | {"nrarrw;", '\u219D', '\u0338'}, 1576 | {"nrightarrow;", '\U0000219B', 0}, 1577 | {"nrtri;", '\U000022EB', 0}, 1578 | {"nrtrie;", '\U000022ED', 0}, 1579 | {"nsc;", '\U00002281', 0}, 1580 | {"nsccue;", '\U000022E1', 0}, 1581 | {"nsce;", '\u2AB0', '\u0338'}, 1582 | {"nscr;", '\U0001D4C3', 0}, 1583 | {"nshortmid;", '\U00002224', 0}, 1584 | {"nshortparallel;", '\U00002226', 0}, 1585 | {"nsim;", '\U00002241', 0}, 1586 | {"nsime;", '\U00002244', 0}, 1587 | {"nsimeq;", '\U00002244', 0}, 1588 | {"nsmid;", '\U00002224', 0}, 1589 | {"nspar;", '\U00002226', 0}, 1590 | {"nsqsube;", '\U000022E2', 0}, 1591 | {"nsqsupe;", '\U000022E3', 0}, 1592 | {"nsub;", '\U00002284', 0}, 1593 | {"nsubE;", '\u2AC5', '\u0338'}, 1594 | {"nsube;", '\U00002288', 0}, 1595 | {"nsubset;", '\u2282', '\u20D2'}, 1596 | {"nsubseteq;", '\U00002288', 0}, 1597 | {"nsubseteqq;", '\u2AC5', '\u0338'}, 1598 | {"nsucc;", '\U00002281', 0}, 1599 | {"nsucceq;", '\u2AB0', '\u0338'}, 1600 | {"nsup;", '\U00002285', 0}, 1601 | {"nsupE;", '\u2AC6', '\u0338'}, 1602 | {"nsupe;", '\U00002289', 0}, 1603 | {"nsupset;", '\u2283', '\u20D2'}, 1604 | {"nsupseteq;", '\U00002289', 0}, 1605 | {"nsupseteqq;", '\u2AC6', '\u0338'}, 1606 | {"ntgl;", '\U00002279', 0}, 1607 | {"ntilde", '\U000000F1', 0}, 1608 | {"ntilde;", '\U000000F1', 0}, 1609 | {"ntlg;", '\U00002278', 0}, 1610 | {"ntriangleleft;", '\U000022EA', 0}, 1611 | {"ntrianglelefteq;", '\U000022EC', 0}, 1612 | {"ntriangleright;", '\U000022EB', 0}, 1613 | {"ntrianglerighteq;", '\U000022ED', 0}, 1614 | {"nu;", '\U000003BD', 0}, 1615 | {"num;", '\U00000023', 0}, 1616 | {"numero;", '\U00002116', 0}, 1617 | {"numsp;", '\U00002007', 0}, 1618 | {"nvDash;", '\U000022AD', 0}, 1619 | {"nvHarr;", '\U00002904', 0}, 1620 | {"nvap;", '\u224D', '\u20D2'}, 1621 | {"nvdash;", '\U000022AC', 0}, 1622 | {"nvge;", '\u2265', '\u20D2'}, 1623 | {"nvgt;", '\u003E', '\u20D2'}, 1624 | {"nvinfin;", '\U000029DE', 0}, 1625 | {"nvlArr;", '\U00002902', 0}, 1626 | {"nvle;", '\u2264', '\u20D2'}, 1627 | {"nvlt;", '\u003C', '\u20D2'}, 1628 | {"nvltrie;", '\u22B4', '\u20D2'}, 1629 | {"nvrArr;", '\U00002903', 0}, 1630 | {"nvrtrie;", '\u22B5', '\u20D2'}, 1631 | {"nvsim;", '\u223C', '\u20D2'}, 1632 | {"nwArr;", '\U000021D6', 0}, 1633 | {"nwarhk;", '\U00002923', 0}, 1634 | {"nwarr;", '\U00002196', 0}, 1635 | {"nwarrow;", '\U00002196', 0}, 1636 | {"nwnear;", '\U00002927', 0}, 1637 | {"oS;", '\U000024C8', 0}, 1638 | {"oacute", '\U000000F3', 0}, 1639 | {"oacute;", '\U000000F3', 0}, 1640 | {"oast;", '\U0000229B', 0}, 1641 | {"ocir;", '\U0000229A', 0}, 1642 | {"ocirc", '\U000000F4', 0}, 1643 | {"ocirc;", '\U000000F4', 0}, 1644 | {"ocy;", '\U0000043E', 0}, 1645 | {"odash;", '\U0000229D', 0}, 1646 | {"odblac;", '\U00000151', 0}, 1647 | {"odiv;", '\U00002A38', 0}, 1648 | {"odot;", '\U00002299', 0}, 1649 | {"odsold;", '\U000029BC', 0}, 1650 | {"oelig;", '\U00000153', 0}, 1651 | {"ofcir;", '\U000029BF', 0}, 1652 | {"ofr;", '\U0001D52C', 0}, 1653 | {"ogon;", '\U000002DB', 0}, 1654 | {"ograve", '\U000000F2', 0}, 1655 | {"ograve;", '\U000000F2', 0}, 1656 | {"ogt;", '\U000029C1', 0}, 1657 | {"ohbar;", '\U000029B5', 0}, 1658 | {"ohm;", '\U000003A9', 0}, 1659 | {"oint;", '\U0000222E', 0}, 1660 | {"olarr;", '\U000021BA', 0}, 1661 | {"olcir;", '\U000029BE', 0}, 1662 | {"olcross;", '\U000029BB', 0}, 1663 | {"oline;", '\U0000203E', 0}, 1664 | {"olt;", '\U000029C0', 0}, 1665 | {"omacr;", '\U0000014D', 0}, 1666 | {"omega;", '\U000003C9', 0}, 1667 | {"omicron;", '\U000003BF', 0}, 1668 | {"omid;", '\U000029B6', 0}, 1669 | {"ominus;", '\U00002296', 0}, 1670 | {"oopf;", '\U0001D560', 0}, 1671 | {"opar;", '\U000029B7', 0}, 1672 | {"operp;", '\U000029B9', 0}, 1673 | {"oplus;", '\U00002295', 0}, 1674 | {"or;", '\U00002228', 0}, 1675 | {"orarr;", '\U000021BB', 0}, 1676 | {"ord;", '\U00002A5D', 0}, 1677 | {"order;", '\U00002134', 0}, 1678 | {"orderof;", '\U00002134', 0}, 1679 | {"ordf", '\U000000AA', 0}, 1680 | {"ordf;", '\U000000AA', 0}, 1681 | {"ordm", '\U000000BA', 0}, 1682 | {"ordm;", '\U000000BA', 0}, 1683 | {"origof;", '\U000022B6', 0}, 1684 | {"oror;", '\U00002A56', 0}, 1685 | {"orslope;", '\U00002A57', 0}, 1686 | {"orv;", '\U00002A5B', 0}, 1687 | {"oscr;", '\U00002134', 0}, 1688 | {"oslash", '\U000000F8', 0}, 1689 | {"oslash;", '\U000000F8', 0}, 1690 | {"osol;", '\U00002298', 0}, 1691 | {"otilde", '\U000000F5', 0}, 1692 | {"otilde;", '\U000000F5', 0}, 1693 | {"otimes;", '\U00002297', 0}, 1694 | {"otimesas;", '\U00002A36', 0}, 1695 | {"ouml", '\U000000F6', 0}, 1696 | {"ouml;", '\U000000F6', 0}, 1697 | {"ovbar;", '\U0000233D', 0}, 1698 | {"par;", '\U00002225', 0}, 1699 | {"para", '\U000000B6', 0}, 1700 | {"para;", '\U000000B6', 0}, 1701 | {"parallel;", '\U00002225', 0}, 1702 | {"parsim;", '\U00002AF3', 0}, 1703 | {"parsl;", '\U00002AFD', 0}, 1704 | {"part;", '\U00002202', 0}, 1705 | {"pcy;", '\U0000043F', 0}, 1706 | {"percnt;", '\U00000025', 0}, 1707 | {"period;", '\U0000002E', 0}, 1708 | {"permil;", '\U00002030', 0}, 1709 | {"perp;", '\U000022A5', 0}, 1710 | {"pertenk;", '\U00002031', 0}, 1711 | {"pfr;", '\U0001D52D', 0}, 1712 | {"phi;", '\U000003C6', 0}, 1713 | {"phiv;", '\U000003D5', 0}, 1714 | {"phmmat;", '\U00002133', 0}, 1715 | {"phone;", '\U0000260E', 0}, 1716 | {"pi;", '\U000003C0', 0}, 1717 | {"pitchfork;", '\U000022D4', 0}, 1718 | {"piv;", '\U000003D6', 0}, 1719 | {"planck;", '\U0000210F', 0}, 1720 | {"planckh;", '\U0000210E', 0}, 1721 | {"plankv;", '\U0000210F', 0}, 1722 | {"plus;", '\U0000002B', 0}, 1723 | {"plusacir;", '\U00002A23', 0}, 1724 | {"plusb;", '\U0000229E', 0}, 1725 | {"pluscir;", '\U00002A22', 0}, 1726 | {"plusdo;", '\U00002214', 0}, 1727 | {"plusdu;", '\U00002A25', 0}, 1728 | {"pluse;", '\U00002A72', 0}, 1729 | {"plusmn", '\U000000B1', 0}, 1730 | {"plusmn;", '\U000000B1', 0}, 1731 | {"plussim;", '\U00002A26', 0}, 1732 | {"plustwo;", '\U00002A27', 0}, 1733 | {"pm;", '\U000000B1', 0}, 1734 | {"pointint;", '\U00002A15', 0}, 1735 | {"popf;", '\U0001D561', 0}, 1736 | {"pound", '\U000000A3', 0}, 1737 | {"pound;", '\U000000A3', 0}, 1738 | {"pr;", '\U0000227A', 0}, 1739 | {"prE;", '\U00002AB3', 0}, 1740 | {"prap;", '\U00002AB7', 0}, 1741 | {"prcue;", '\U0000227C', 0}, 1742 | {"pre;", '\U00002AAF', 0}, 1743 | {"prec;", '\U0000227A', 0}, 1744 | {"precapprox;", '\U00002AB7', 0}, 1745 | {"preccurlyeq;", '\U0000227C', 0}, 1746 | {"preceq;", '\U00002AAF', 0}, 1747 | {"precnapprox;", '\U00002AB9', 0}, 1748 | {"precneqq;", '\U00002AB5', 0}, 1749 | {"precnsim;", '\U000022E8', 0}, 1750 | {"precsim;", '\U0000227E', 0}, 1751 | {"prime;", '\U00002032', 0}, 1752 | {"primes;", '\U00002119', 0}, 1753 | {"prnE;", '\U00002AB5', 0}, 1754 | {"prnap;", '\U00002AB9', 0}, 1755 | {"prnsim;", '\U000022E8', 0}, 1756 | {"prod;", '\U0000220F', 0}, 1757 | {"profalar;", '\U0000232E', 0}, 1758 | {"profline;", '\U00002312', 0}, 1759 | {"profsurf;", '\U00002313', 0}, 1760 | {"prop;", '\U0000221D', 0}, 1761 | {"propto;", '\U0000221D', 0}, 1762 | {"prsim;", '\U0000227E', 0}, 1763 | {"prurel;", '\U000022B0', 0}, 1764 | {"pscr;", '\U0001D4C5', 0}, 1765 | {"psi;", '\U000003C8', 0}, 1766 | {"puncsp;", '\U00002008', 0}, 1767 | {"qfr;", '\U0001D52E', 0}, 1768 | {"qint;", '\U00002A0C', 0}, 1769 | {"qopf;", '\U0001D562', 0}, 1770 | {"qprime;", '\U00002057', 0}, 1771 | {"qscr;", '\U0001D4C6', 0}, 1772 | {"quaternions;", '\U0000210D', 0}, 1773 | {"quatint;", '\U00002A16', 0}, 1774 | {"quest;", '\U0000003F', 0}, 1775 | {"questeq;", '\U0000225F', 0}, 1776 | {"quot", '\U00000022', 0}, 1777 | {"quot;", '\U00000022', 0}, 1778 | {"rAarr;", '\U000021DB', 0}, 1779 | {"rArr;", '\U000021D2', 0}, 1780 | {"rAtail;", '\U0000291C', 0}, 1781 | {"rBarr;", '\U0000290F', 0}, 1782 | {"rHar;", '\U00002964', 0}, 1783 | {"race;", '\u223D', '\u0331'}, 1784 | {"racute;", '\U00000155', 0}, 1785 | {"radic;", '\U0000221A', 0}, 1786 | {"raemptyv;", '\U000029B3', 0}, 1787 | {"rang;", '\U000027E9', 0}, 1788 | {"rangd;", '\U00002992', 0}, 1789 | {"range;", '\U000029A5', 0}, 1790 | {"rangle;", '\U000027E9', 0}, 1791 | {"raquo", '\U000000BB', 0}, 1792 | {"raquo;", '\U000000BB', 0}, 1793 | {"rarr;", '\U00002192', 0}, 1794 | {"rarrap;", '\U00002975', 0}, 1795 | {"rarrb;", '\U000021E5', 0}, 1796 | {"rarrbfs;", '\U00002920', 0}, 1797 | {"rarrc;", '\U00002933', 0}, 1798 | {"rarrfs;", '\U0000291E', 0}, 1799 | {"rarrhk;", '\U000021AA', 0}, 1800 | {"rarrlp;", '\U000021AC', 0}, 1801 | {"rarrpl;", '\U00002945', 0}, 1802 | {"rarrsim;", '\U00002974', 0}, 1803 | {"rarrtl;", '\U000021A3', 0}, 1804 | {"rarrw;", '\U0000219D', 0}, 1805 | {"ratail;", '\U0000291A', 0}, 1806 | {"ratio;", '\U00002236', 0}, 1807 | {"rationals;", '\U0000211A', 0}, 1808 | {"rbarr;", '\U0000290D', 0}, 1809 | {"rbbrk;", '\U00002773', 0}, 1810 | {"rbrace;", '\U0000007D', 0}, 1811 | {"rbrack;", '\U0000005D', 0}, 1812 | {"rbrke;", '\U0000298C', 0}, 1813 | {"rbrksld;", '\U0000298E', 0}, 1814 | {"rbrkslu;", '\U00002990', 0}, 1815 | {"rcaron;", '\U00000159', 0}, 1816 | {"rcedil;", '\U00000157', 0}, 1817 | {"rceil;", '\U00002309', 0}, 1818 | {"rcub;", '\U0000007D', 0}, 1819 | {"rcy;", '\U00000440', 0}, 1820 | {"rdca;", '\U00002937', 0}, 1821 | {"rdldhar;", '\U00002969', 0}, 1822 | {"rdquo;", '\U0000201D', 0}, 1823 | {"rdquor;", '\U0000201D', 0}, 1824 | {"rdsh;", '\U000021B3', 0}, 1825 | {"real;", '\U0000211C', 0}, 1826 | {"realine;", '\U0000211B', 0}, 1827 | {"realpart;", '\U0000211C', 0}, 1828 | {"reals;", '\U0000211D', 0}, 1829 | {"rect;", '\U000025AD', 0}, 1830 | {"reg", '\U000000AE', 0}, 1831 | {"reg;", '\U000000AE', 0}, 1832 | {"rfisht;", '\U0000297D', 0}, 1833 | {"rfloor;", '\U0000230B', 0}, 1834 | {"rfr;", '\U0001D52F', 0}, 1835 | {"rhard;", '\U000021C1', 0}, 1836 | {"rharu;", '\U000021C0', 0}, 1837 | {"rharul;", '\U0000296C', 0}, 1838 | {"rho;", '\U000003C1', 0}, 1839 | {"rhov;", '\U000003F1', 0}, 1840 | {"rightarrow;", '\U00002192', 0}, 1841 | {"rightarrowtail;", '\U000021A3', 0}, 1842 | {"rightharpoondown;", '\U000021C1', 0}, 1843 | {"rightharpoonup;", '\U000021C0', 0}, 1844 | {"rightleftarrows;", '\U000021C4', 0}, 1845 | {"rightleftharpoons;", '\U000021CC', 0}, 1846 | {"rightrightarrows;", '\U000021C9', 0}, 1847 | {"rightsquigarrow;", '\U0000219D', 0}, 1848 | {"rightthreetimes;", '\U000022CC', 0}, 1849 | {"ring;", '\U000002DA', 0}, 1850 | {"risingdotseq;", '\U00002253', 0}, 1851 | {"rlarr;", '\U000021C4', 0}, 1852 | {"rlhar;", '\U000021CC', 0}, 1853 | {"rlm;", '\U0000200F', 0}, 1854 | {"rmoust;", '\U000023B1', 0}, 1855 | {"rmoustache;", '\U000023B1', 0}, 1856 | {"rnmid;", '\U00002AEE', 0}, 1857 | {"roang;", '\U000027ED', 0}, 1858 | {"roarr;", '\U000021FE', 0}, 1859 | {"robrk;", '\U000027E7', 0}, 1860 | {"ropar;", '\U00002986', 0}, 1861 | {"ropf;", '\U0001D563', 0}, 1862 | {"roplus;", '\U00002A2E', 0}, 1863 | {"rotimes;", '\U00002A35', 0}, 1864 | {"rpar;", '\U00000029', 0}, 1865 | {"rpargt;", '\U00002994', 0}, 1866 | {"rppolint;", '\U00002A12', 0}, 1867 | {"rrarr;", '\U000021C9', 0}, 1868 | {"rsaquo;", '\U0000203A', 0}, 1869 | {"rscr;", '\U0001D4C7', 0}, 1870 | {"rsh;", '\U000021B1', 0}, 1871 | {"rsqb;", '\U0000005D', 0}, 1872 | {"rsquo;", '\U00002019', 0}, 1873 | {"rsquor;", '\U00002019', 0}, 1874 | {"rthree;", '\U000022CC', 0}, 1875 | {"rtimes;", '\U000022CA', 0}, 1876 | {"rtri;", '\U000025B9', 0}, 1877 | {"rtrie;", '\U000022B5', 0}, 1878 | {"rtrif;", '\U000025B8', 0}, 1879 | {"rtriltri;", '\U000029CE', 0}, 1880 | {"ruluhar;", '\U00002968', 0}, 1881 | {"rx;", '\U0000211E', 0}, 1882 | {"sacute;", '\U0000015B', 0}, 1883 | {"sbquo;", '\U0000201A', 0}, 1884 | {"sc;", '\U0000227B', 0}, 1885 | {"scE;", '\U00002AB4', 0}, 1886 | {"scap;", '\U00002AB8', 0}, 1887 | {"scaron;", '\U00000161', 0}, 1888 | {"sccue;", '\U0000227D', 0}, 1889 | {"sce;", '\U00002AB0', 0}, 1890 | {"scedil;", '\U0000015F', 0}, 1891 | {"scirc;", '\U0000015D', 0}, 1892 | {"scnE;", '\U00002AB6', 0}, 1893 | {"scnap;", '\U00002ABA', 0}, 1894 | {"scnsim;", '\U000022E9', 0}, 1895 | {"scpolint;", '\U00002A13', 0}, 1896 | {"scsim;", '\U0000227F', 0}, 1897 | {"scy;", '\U00000441', 0}, 1898 | {"sdot;", '\U000022C5', 0}, 1899 | {"sdotb;", '\U000022A1', 0}, 1900 | {"sdote;", '\U00002A66', 0}, 1901 | {"seArr;", '\U000021D8', 0}, 1902 | {"searhk;", '\U00002925', 0}, 1903 | {"searr;", '\U00002198', 0}, 1904 | {"searrow;", '\U00002198', 0}, 1905 | {"sect", '\U000000A7', 0}, 1906 | {"sect;", '\U000000A7', 0}, 1907 | {"semi;", '\U0000003B', 0}, 1908 | {"seswar;", '\U00002929', 0}, 1909 | {"setminus;", '\U00002216', 0}, 1910 | {"setmn;", '\U00002216', 0}, 1911 | {"sext;", '\U00002736', 0}, 1912 | {"sfr;", '\U0001D530', 0}, 1913 | {"sfrown;", '\U00002322', 0}, 1914 | {"sharp;", '\U0000266F', 0}, 1915 | {"shchcy;", '\U00000449', 0}, 1916 | {"shcy;", '\U00000448', 0}, 1917 | {"shortmid;", '\U00002223', 0}, 1918 | {"shortparallel;", '\U00002225', 0}, 1919 | {"shy", '\U000000AD', 0}, 1920 | {"shy;", '\U000000AD', 0}, 1921 | {"sigma;", '\U000003C3', 0}, 1922 | {"sigmaf;", '\U000003C2', 0}, 1923 | {"sigmav;", '\U000003C2', 0}, 1924 | {"sim;", '\U0000223C', 0}, 1925 | {"simdot;", '\U00002A6A', 0}, 1926 | {"sime;", '\U00002243', 0}, 1927 | {"simeq;", '\U00002243', 0}, 1928 | {"simg;", '\U00002A9E', 0}, 1929 | {"simgE;", '\U00002AA0', 0}, 1930 | {"siml;", '\U00002A9D', 0}, 1931 | {"simlE;", '\U00002A9F', 0}, 1932 | {"simne;", '\U00002246', 0}, 1933 | {"simplus;", '\U00002A24', 0}, 1934 | {"simrarr;", '\U00002972', 0}, 1935 | {"slarr;", '\U00002190', 0}, 1936 | {"smallsetminus;", '\U00002216', 0}, 1937 | {"smashp;", '\U00002A33', 0}, 1938 | {"smeparsl;", '\U000029E4', 0}, 1939 | {"smid;", '\U00002223', 0}, 1940 | {"smile;", '\U00002323', 0}, 1941 | {"smt;", '\U00002AAA', 0}, 1942 | {"smte;", '\U00002AAC', 0}, 1943 | {"smtes;", '\u2AAC', '\uFE00'}, 1944 | {"softcy;", '\U0000044C', 0}, 1945 | {"sol;", '\U0000002F', 0}, 1946 | {"solb;", '\U000029C4', 0}, 1947 | {"solbar;", '\U0000233F', 0}, 1948 | {"sopf;", '\U0001D564', 0}, 1949 | {"spades;", '\U00002660', 0}, 1950 | {"spadesuit;", '\U00002660', 0}, 1951 | {"spar;", '\U00002225', 0}, 1952 | {"sqcap;", '\U00002293', 0}, 1953 | {"sqcaps;", '\u2293', '\uFE00'}, 1954 | {"sqcup;", '\U00002294', 0}, 1955 | {"sqcups;", '\u2294', '\uFE00'}, 1956 | {"sqsub;", '\U0000228F', 0}, 1957 | {"sqsube;", '\U00002291', 0}, 1958 | {"sqsubset;", '\U0000228F', 0}, 1959 | {"sqsubseteq;", '\U00002291', 0}, 1960 | {"sqsup;", '\U00002290', 0}, 1961 | {"sqsupe;", '\U00002292', 0}, 1962 | {"sqsupset;", '\U00002290', 0}, 1963 | {"sqsupseteq;", '\U00002292', 0}, 1964 | {"squ;", '\U000025A1', 0}, 1965 | {"square;", '\U000025A1', 0}, 1966 | {"squarf;", '\U000025AA', 0}, 1967 | {"squf;", '\U000025AA', 0}, 1968 | {"srarr;", '\U00002192', 0}, 1969 | {"sscr;", '\U0001D4C8', 0}, 1970 | {"ssetmn;", '\U00002216', 0}, 1971 | {"ssmile;", '\U00002323', 0}, 1972 | {"sstarf;", '\U000022C6', 0}, 1973 | {"star;", '\U00002606', 0}, 1974 | {"starf;", '\U00002605', 0}, 1975 | {"straightepsilon;", '\U000003F5', 0}, 1976 | {"straightphi;", '\U000003D5', 0}, 1977 | {"strns;", '\U000000AF', 0}, 1978 | {"sub;", '\U00002282', 0}, 1979 | {"subE;", '\U00002AC5', 0}, 1980 | {"subdot;", '\U00002ABD', 0}, 1981 | {"sube;", '\U00002286', 0}, 1982 | {"subedot;", '\U00002AC3', 0}, 1983 | {"submult;", '\U00002AC1', 0}, 1984 | {"subnE;", '\U00002ACB', 0}, 1985 | {"subne;", '\U0000228A', 0}, 1986 | {"subplus;", '\U00002ABF', 0}, 1987 | {"subrarr;", '\U00002979', 0}, 1988 | {"subset;", '\U00002282', 0}, 1989 | {"subseteq;", '\U00002286', 0}, 1990 | {"subseteqq;", '\U00002AC5', 0}, 1991 | {"subsetneq;", '\U0000228A', 0}, 1992 | {"subsetneqq;", '\U00002ACB', 0}, 1993 | {"subsim;", '\U00002AC7', 0}, 1994 | {"subsub;", '\U00002AD5', 0}, 1995 | {"subsup;", '\U00002AD3', 0}, 1996 | {"succ;", '\U0000227B', 0}, 1997 | {"succapprox;", '\U00002AB8', 0}, 1998 | {"succcurlyeq;", '\U0000227D', 0}, 1999 | {"succeq;", '\U00002AB0', 0}, 2000 | {"succnapprox;", '\U00002ABA', 0}, 2001 | {"succneqq;", '\U00002AB6', 0}, 2002 | {"succnsim;", '\U000022E9', 0}, 2003 | {"succsim;", '\U0000227F', 0}, 2004 | {"sum;", '\U00002211', 0}, 2005 | {"sung;", '\U0000266A', 0}, 2006 | {"sup1", '\U000000B9', 0}, 2007 | {"sup1;", '\U000000B9', 0}, 2008 | {"sup2", '\U000000B2', 0}, 2009 | {"sup2;", '\U000000B2', 0}, 2010 | {"sup3", '\U000000B3', 0}, 2011 | {"sup3;", '\U000000B3', 0}, 2012 | {"sup;", '\U00002283', 0}, 2013 | {"supE;", '\U00002AC6', 0}, 2014 | {"supdot;", '\U00002ABE', 0}, 2015 | {"supdsub;", '\U00002AD8', 0}, 2016 | {"supe;", '\U00002287', 0}, 2017 | {"supedot;", '\U00002AC4', 0}, 2018 | {"suphsol;", '\U000027C9', 0}, 2019 | {"suphsub;", '\U00002AD7', 0}, 2020 | {"suplarr;", '\U0000297B', 0}, 2021 | {"supmult;", '\U00002AC2', 0}, 2022 | {"supnE;", '\U00002ACC', 0}, 2023 | {"supne;", '\U0000228B', 0}, 2024 | {"supplus;", '\U00002AC0', 0}, 2025 | {"supset;", '\U00002283', 0}, 2026 | {"supseteq;", '\U00002287', 0}, 2027 | {"supseteqq;", '\U00002AC6', 0}, 2028 | {"supsetneq;", '\U0000228B', 0}, 2029 | {"supsetneqq;", '\U00002ACC', 0}, 2030 | {"supsim;", '\U00002AC8', 0}, 2031 | {"supsub;", '\U00002AD4', 0}, 2032 | {"supsup;", '\U00002AD6', 0}, 2033 | {"swArr;", '\U000021D9', 0}, 2034 | {"swarhk;", '\U00002926', 0}, 2035 | {"swarr;", '\U00002199', 0}, 2036 | {"swarrow;", '\U00002199', 0}, 2037 | {"swnwar;", '\U0000292A', 0}, 2038 | {"szlig", '\U000000DF', 0}, 2039 | {"szlig;", '\U000000DF', 0}, 2040 | {"target;", '\U00002316', 0}, 2041 | {"tau;", '\U000003C4', 0}, 2042 | {"tbrk;", '\U000023B4', 0}, 2043 | {"tcaron;", '\U00000165', 0}, 2044 | {"tcedil;", '\U00000163', 0}, 2045 | {"tcy;", '\U00000442', 0}, 2046 | {"tdot;", '\U000020DB', 0}, 2047 | {"telrec;", '\U00002315', 0}, 2048 | {"tfr;", '\U0001D531', 0}, 2049 | {"there4;", '\U00002234', 0}, 2050 | {"therefore;", '\U00002234', 0}, 2051 | {"theta;", '\U000003B8', 0}, 2052 | {"thetasym;", '\U000003D1', 0}, 2053 | {"thetav;", '\U000003D1', 0}, 2054 | {"thickapprox;", '\U00002248', 0}, 2055 | {"thicksim;", '\U0000223C', 0}, 2056 | {"thinsp;", '\U00002009', 0}, 2057 | {"thkap;", '\U00002248', 0}, 2058 | {"thksim;", '\U0000223C', 0}, 2059 | {"thorn", '\U000000FE', 0}, 2060 | {"thorn;", '\U000000FE', 0}, 2061 | {"tilde;", '\U000002DC', 0}, 2062 | {"times", '\U000000D7', 0}, 2063 | {"times;", '\U000000D7', 0}, 2064 | {"timesb;", '\U000022A0', 0}, 2065 | {"timesbar;", '\U00002A31', 0}, 2066 | {"timesd;", '\U00002A30', 0}, 2067 | {"tint;", '\U0000222D', 0}, 2068 | {"toea;", '\U00002928', 0}, 2069 | {"top;", '\U000022A4', 0}, 2070 | {"topbot;", '\U00002336', 0}, 2071 | {"topcir;", '\U00002AF1', 0}, 2072 | {"topf;", '\U0001D565', 0}, 2073 | {"topfork;", '\U00002ADA', 0}, 2074 | {"tosa;", '\U00002929', 0}, 2075 | {"tprime;", '\U00002034', 0}, 2076 | {"trade;", '\U00002122', 0}, 2077 | {"triangle;", '\U000025B5', 0}, 2078 | {"triangledown;", '\U000025BF', 0}, 2079 | {"triangleleft;", '\U000025C3', 0}, 2080 | {"trianglelefteq;", '\U000022B4', 0}, 2081 | {"triangleq;", '\U0000225C', 0}, 2082 | {"triangleright;", '\U000025B9', 0}, 2083 | {"trianglerighteq;", '\U000022B5', 0}, 2084 | {"tridot;", '\U000025EC', 0}, 2085 | {"trie;", '\U0000225C', 0}, 2086 | {"triminus;", '\U00002A3A', 0}, 2087 | {"triplus;", '\U00002A39', 0}, 2088 | {"trisb;", '\U000029CD', 0}, 2089 | {"tritime;", '\U00002A3B', 0}, 2090 | {"trpezium;", '\U000023E2', 0}, 2091 | {"tscr;", '\U0001D4C9', 0}, 2092 | {"tscy;", '\U00000446', 0}, 2093 | {"tshcy;", '\U0000045B', 0}, 2094 | {"tstrok;", '\U00000167', 0}, 2095 | {"twixt;", '\U0000226C', 0}, 2096 | {"twoheadleftarrow;", '\U0000219E', 0}, 2097 | {"twoheadrightarrow;", '\U000021A0', 0}, 2098 | {"uArr;", '\U000021D1', 0}, 2099 | {"uHar;", '\U00002963', 0}, 2100 | {"uacute", '\U000000FA', 0}, 2101 | {"uacute;", '\U000000FA', 0}, 2102 | {"uarr;", '\U00002191', 0}, 2103 | {"ubrcy;", '\U0000045E', 0}, 2104 | {"ubreve;", '\U0000016D', 0}, 2105 | {"ucirc", '\U000000FB', 0}, 2106 | {"ucirc;", '\U000000FB', 0}, 2107 | {"ucy;", '\U00000443', 0}, 2108 | {"udarr;", '\U000021C5', 0}, 2109 | {"udblac;", '\U00000171', 0}, 2110 | {"udhar;", '\U0000296E', 0}, 2111 | {"ufisht;", '\U0000297E', 0}, 2112 | {"ufr;", '\U0001D532', 0}, 2113 | {"ugrave", '\U000000F9', 0}, 2114 | {"ugrave;", '\U000000F9', 0}, 2115 | {"uharl;", '\U000021BF', 0}, 2116 | {"uharr;", '\U000021BE', 0}, 2117 | {"uhblk;", '\U00002580', 0}, 2118 | {"ulcorn;", '\U0000231C', 0}, 2119 | {"ulcorner;", '\U0000231C', 0}, 2120 | {"ulcrop;", '\U0000230F', 0}, 2121 | {"ultri;", '\U000025F8', 0}, 2122 | {"umacr;", '\U0000016B', 0}, 2123 | {"uml", '\U000000A8', 0}, 2124 | {"uml;", '\U000000A8', 0}, 2125 | {"uogon;", '\U00000173', 0}, 2126 | {"uopf;", '\U0001D566', 0}, 2127 | {"uparrow;", '\U00002191', 0}, 2128 | {"updownarrow;", '\U00002195', 0}, 2129 | {"upharpoonleft;", '\U000021BF', 0}, 2130 | {"upharpoonright;", '\U000021BE', 0}, 2131 | {"uplus;", '\U0000228E', 0}, 2132 | {"upsi;", '\U000003C5', 0}, 2133 | {"upsih;", '\U000003D2', 0}, 2134 | {"upsilon;", '\U000003C5', 0}, 2135 | {"upuparrows;", '\U000021C8', 0}, 2136 | {"urcorn;", '\U0000231D', 0}, 2137 | {"urcorner;", '\U0000231D', 0}, 2138 | {"urcrop;", '\U0000230E', 0}, 2139 | {"uring;", '\U0000016F', 0}, 2140 | {"urtri;", '\U000025F9', 0}, 2141 | {"uscr;", '\U0001D4CA', 0}, 2142 | {"utdot;", '\U000022F0', 0}, 2143 | {"utilde;", '\U00000169', 0}, 2144 | {"utri;", '\U000025B5', 0}, 2145 | {"utrif;", '\U000025B4', 0}, 2146 | {"uuarr;", '\U000021C8', 0}, 2147 | {"uuml", '\U000000FC', 0}, 2148 | {"uuml;", '\U000000FC', 0}, 2149 | {"uwangle;", '\U000029A7', 0}, 2150 | {"vArr;", '\U000021D5', 0}, 2151 | {"vBar;", '\U00002AE8', 0}, 2152 | {"vBarv;", '\U00002AE9', 0}, 2153 | {"vDash;", '\U000022A8', 0}, 2154 | {"vangrt;", '\U0000299C', 0}, 2155 | {"varepsilon;", '\U000003F5', 0}, 2156 | {"varkappa;", '\U000003F0', 0}, 2157 | {"varnothing;", '\U00002205', 0}, 2158 | {"varphi;", '\U000003D5', 0}, 2159 | {"varpi;", '\U000003D6', 0}, 2160 | {"varpropto;", '\U0000221D', 0}, 2161 | {"varr;", '\U00002195', 0}, 2162 | {"varrho;", '\U000003F1', 0}, 2163 | {"varsigma;", '\U000003C2', 0}, 2164 | {"varsubsetneq;", '\u228A', '\uFE00'}, 2165 | {"varsubsetneqq;", '\u2ACB', '\uFE00'}, 2166 | {"varsupsetneq;", '\u228B', '\uFE00'}, 2167 | {"varsupsetneqq;", '\u2ACC', '\uFE00'}, 2168 | {"vartheta;", '\U000003D1', 0}, 2169 | {"vartriangleleft;", '\U000022B2', 0}, 2170 | {"vartriangleright;", '\U000022B3', 0}, 2171 | {"vcy;", '\U00000432', 0}, 2172 | {"vdash;", '\U000022A2', 0}, 2173 | {"vee;", '\U00002228', 0}, 2174 | {"veebar;", '\U000022BB', 0}, 2175 | {"veeeq;", '\U0000225A', 0}, 2176 | {"vellip;", '\U000022EE', 0}, 2177 | {"verbar;", '\U0000007C', 0}, 2178 | {"vert;", '\U0000007C', 0}, 2179 | {"vfr;", '\U0001D533', 0}, 2180 | {"vltri;", '\U000022B2', 0}, 2181 | {"vnsub;", '\u2282', '\u20D2'}, 2182 | {"vnsup;", '\u2283', '\u20D2'}, 2183 | {"vopf;", '\U0001D567', 0}, 2184 | {"vprop;", '\U0000221D', 0}, 2185 | {"vrtri;", '\U000022B3', 0}, 2186 | {"vscr;", '\U0001D4CB', 0}, 2187 | {"vsubnE;", '\u2ACB', '\uFE00'}, 2188 | {"vsubne;", '\u228A', '\uFE00'}, 2189 | {"vsupnE;", '\u2ACC', '\uFE00'}, 2190 | {"vsupne;", '\u228B', '\uFE00'}, 2191 | {"vzigzag;", '\U0000299A', 0}, 2192 | {"wcirc;", '\U00000175', 0}, 2193 | {"wedbar;", '\U00002A5F', 0}, 2194 | {"wedge;", '\U00002227', 0}, 2195 | {"wedgeq;", '\U00002259', 0}, 2196 | {"weierp;", '\U00002118', 0}, 2197 | {"wfr;", '\U0001D534', 0}, 2198 | {"wopf;", '\U0001D568', 0}, 2199 | {"wp;", '\U00002118', 0}, 2200 | {"wr;", '\U00002240', 0}, 2201 | {"wreath;", '\U00002240', 0}, 2202 | {"wscr;", '\U0001D4CC', 0}, 2203 | {"xcap;", '\U000022C2', 0}, 2204 | {"xcirc;", '\U000025EF', 0}, 2205 | {"xcup;", '\U000022C3', 0}, 2206 | {"xdtri;", '\U000025BD', 0}, 2207 | {"xfr;", '\U0001D535', 0}, 2208 | {"xhArr;", '\U000027FA', 0}, 2209 | {"xharr;", '\U000027F7', 0}, 2210 | {"xi;", '\U000003BE', 0}, 2211 | {"xlArr;", '\U000027F8', 0}, 2212 | {"xlarr;", '\U000027F5', 0}, 2213 | {"xmap;", '\U000027FC', 0}, 2214 | {"xnis;", '\U000022FB', 0}, 2215 | {"xodot;", '\U00002A00', 0}, 2216 | {"xopf;", '\U0001D569', 0}, 2217 | {"xoplus;", '\U00002A01', 0}, 2218 | {"xotime;", '\U00002A02', 0}, 2219 | {"xrArr;", '\U000027F9', 0}, 2220 | {"xrarr;", '\U000027F6', 0}, 2221 | {"xscr;", '\U0001D4CD', 0}, 2222 | {"xsqcup;", '\U00002A06', 0}, 2223 | {"xuplus;", '\U00002A04', 0}, 2224 | {"xutri;", '\U000025B3', 0}, 2225 | {"xvee;", '\U000022C1', 0}, 2226 | {"xwedge;", '\U000022C0', 0}, 2227 | {"yacute", '\U000000FD', 0}, 2228 | {"yacute;", '\U000000FD', 0}, 2229 | {"yacy;", '\U0000044F', 0}, 2230 | {"ycirc;", '\U00000177', 0}, 2231 | {"ycy;", '\U0000044B', 0}, 2232 | {"yen", '\U000000A5', 0}, 2233 | {"yen;", '\U000000A5', 0}, 2234 | {"yfr;", '\U0001D536', 0}, 2235 | {"yicy;", '\U00000457', 0}, 2236 | {"yopf;", '\U0001D56A', 0}, 2237 | {"yscr;", '\U0001D4CE', 0}, 2238 | {"yucy;", '\U0000044E', 0}, 2239 | {"yuml", '\U000000FF', 0}, 2240 | {"yuml;", '\U000000FF', 0}, 2241 | {"zacute;", '\U0000017A', 0}, 2242 | {"zcaron;", '\U0000017E', 0}, 2243 | {"zcy;", '\U00000437', 0}, 2244 | {"zdot;", '\U0000017C', 0}, 2245 | {"zeetrf;", '\U00002128', 0}, 2246 | {"zeta;", '\U000003B6', 0}, 2247 | {"zfr;", '\U0001D537', 0}, 2248 | {"zhcy;", '\U00000436', 0}, 2249 | {"zigrarr;", '\U000021DD', 0}, 2250 | {"zopf;", '\U0001D56B', 0}, 2251 | {"zscr;", '\U0001D4CF', 0}, 2252 | {"zwj;", '\U0000200D', 0}, 2253 | {"zwnj;", '\U0000200C', 0}, 2254 | } 2255 | -------------------------------------------------------------------------------- /euc-jp.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | import ( 4 | "unicode/utf8" 5 | ) 6 | 7 | // Converters for the EUC-JP encoding 8 | 9 | func init() { 10 | RegisterCharset(&Charset{ 11 | Name: "EUC-JP", 12 | Aliases: []string{"extended_unix_code_packed_format_for_japanese", "cseucpkdfmtjapanese"}, 13 | NewDecoder: func() Decoder { 14 | return decodeEucJP 15 | }, 16 | NewEncoder: func() Encoder { 17 | jis0208Table.Reverse() 18 | jis0212Table.Reverse() 19 | return encodeEucJP 20 | }, 21 | }) 22 | } 23 | 24 | func decodeEucJP(p []byte) (c rune, size int, status Status) { 25 | if len(p) == 0 { 26 | return 0, 0, NO_ROOM 27 | } 28 | 29 | b := p[0] 30 | switch { 31 | case b < 0x80: 32 | return rune(b), 1, SUCCESS 33 | 34 | case b == 0x8e: 35 | if len(p) < 2 { 36 | return 0, 0, NO_ROOM 37 | } 38 | b2 := p[1] 39 | if b2 < 0xa1 || b2 > 0xdf { 40 | return utf8.RuneError, 1, INVALID_CHAR 41 | } 42 | return rune(b2) + (0xff61 - 0xa1), 2, SUCCESS 43 | 44 | case b == 0x8f: 45 | if len(p) < 3 { 46 | return 0, 0, NO_ROOM 47 | } 48 | c, size, status = jis0212Table.DecodeHigh(p[1:3]) 49 | if status == SUCCESS { 50 | size = 3 51 | } 52 | return 53 | 54 | case 0xa1 <= b && b <= 0xfe: 55 | return jis0208Table.DecodeHigh(p) 56 | } 57 | 58 | return utf8.RuneError, 1, INVALID_CHAR 59 | } 60 | 61 | func encodeEucJP(p []byte, c rune) (size int, status Status) { 62 | if len(p) == 0 { 63 | return 0, NO_ROOM 64 | } 65 | 66 | if c < 0x80 { 67 | p[0] = byte(c) 68 | return 1, SUCCESS 69 | } 70 | 71 | if len(p) < 2 { 72 | return 0, NO_ROOM 73 | } 74 | 75 | if c > 0xffff { 76 | p[0] = '?' 77 | return 1, INVALID_CHAR 78 | } 79 | 80 | if 0xff61 <= c && c <= 0xff9f { 81 | p[0] = 0x8e 82 | p[1] = byte(c - (0xff61 - 0xa1)) 83 | return 2, SUCCESS 84 | } 85 | 86 | size, status = jis0208Table.EncodeHigh(p, c) 87 | if status == SUCCESS { 88 | return size, status 89 | } 90 | 91 | size, status = jis0212Table.EncodeHigh(p[1:], c) 92 | switch status { 93 | case SUCCESS: 94 | p[0] = 0x8f 95 | return size + 1, SUCCESS 96 | 97 | case INVALID_CHAR: 98 | p[0] = '?' 99 | return 1, INVALID_CHAR 100 | } 101 | return size, status 102 | } 103 | -------------------------------------------------------------------------------- /euc-kr.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | // Converters for the EUC-KR encoding. 4 | 5 | import ( 6 | "unicode/utf8" 7 | ) 8 | 9 | func init() { 10 | RegisterCharset(&Charset{ 11 | Name: "EUC-KR", 12 | Aliases: []string{ 13 | "ibm-1363", 14 | "KS_C_5601-1987", 15 | "KS_C_5601-1989", 16 | "KSC_5601", 17 | "Korean", 18 | "iso-ir-149", 19 | "cp1363", 20 | "5601", 21 | "ksc", 22 | "windows-949", 23 | "ibm-970", 24 | "cp970", 25 | "970", 26 | "cp949", 27 | }, 28 | NewDecoder: func() Decoder { 29 | return decodeEucKr 30 | }, 31 | NewEncoder: func() Encoder { 32 | eucKrOnce.Do(reverseEucKrTable) 33 | return encodeEucKr 34 | }, 35 | }) 36 | } 37 | 38 | func decodeEucKr(p []byte) (c rune, size int, status Status) { 39 | if len(p) == 0 { 40 | return 0, 0, NO_ROOM 41 | } 42 | 43 | b := p[0] 44 | if b < 0x80 { 45 | return rune(b), 1, SUCCESS 46 | } 47 | 48 | if len(p) < 2 { 49 | return 0, 0, NO_ROOM 50 | } 51 | 52 | euc := int(b)<<8 + int(p[1]) 53 | c = rune(eucKrToUnicode[euc]) 54 | 55 | if c == 0 { 56 | return utf8.RuneError, 2, INVALID_CHAR 57 | } 58 | return c, 2, SUCCESS 59 | } 60 | 61 | func encodeEucKr(p []byte, c rune) (size int, status Status) { 62 | if len(p) == 0 { 63 | return 0, NO_ROOM 64 | } 65 | 66 | if c < 0x80 { 67 | p[0] = byte(c) 68 | return 1, SUCCESS 69 | } 70 | 71 | if len(p) < 2 { 72 | return 0, NO_ROOM 73 | } 74 | 75 | if c > 0xffff { 76 | p[0] = '?' 77 | return 1, INVALID_CHAR 78 | } 79 | 80 | euc := unicodeToEucKr[c] 81 | if euc == 0 { 82 | p[0] = '?' 83 | return 1, INVALID_CHAR 84 | } 85 | 86 | p[0] = byte(euc >> 8) 87 | p[1] = byte(euc) 88 | return 2, SUCCESS 89 | } 90 | -------------------------------------------------------------------------------- /fallback.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | // FallbackDecoder combines a series of Decoders into one. 4 | // If the first Decoder returns a status of INVALID_CHAR, the others are tried as well. 5 | // 6 | // Note: if the text to be decoded ends with a sequence of bytes that is not a valid character in the first charset, 7 | // but it could be the beginning of a valid character, the FallbackDecoder will give a status of NO_ROOM instead of 8 | // falling back to the other Decoders. 9 | func FallbackDecoder(decoders ...Decoder) Decoder { 10 | return func(p []byte) (c rune, size int, status Status) { 11 | for _, d := range decoders { 12 | c, size, status = d(p) 13 | if status != INVALID_CHAR { 14 | return 15 | } 16 | } 17 | return 0, 1, INVALID_CHAR 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /gb18030.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | import ( 4 | "sync" 5 | ) 6 | 7 | // Converters for GB18030 encoding. 8 | 9 | func init() { 10 | RegisterCharset(&Charset{ 11 | Name: "GB18030", 12 | NewDecoder: func() Decoder { 13 | gb18030Once.Do(buildGB18030Tables) 14 | return decodeGB18030Rune 15 | }, 16 | NewEncoder: func() Encoder { 17 | gb18030Once.Do(buildGB18030Tables) 18 | return encodeGB18030Rune 19 | }, 20 | }) 21 | } 22 | 23 | func decodeGB18030Rune(p []byte) (r rune, size int, status Status) { 24 | if len(p) == 0 { 25 | status = NO_ROOM 26 | return 27 | } 28 | 29 | b := p[0] 30 | if b < 128 { 31 | return rune(b), 1, SUCCESS 32 | } 33 | 34 | if len(p) < 2 { 35 | status = NO_ROOM 36 | return 37 | } 38 | 39 | if p[0] < 0x81 || p[0] > 0xfe { 40 | return 0xfffd, 1, INVALID_CHAR 41 | } 42 | 43 | if p[1] >= 0x40 { 44 | // 2-byte character 45 | c := uint16(p[0])<<8 + uint16(p[1]) 46 | r = rune(gbkToUnicode[c]) 47 | if r == 0 { 48 | r = gbkToUnicodeExtra[c] 49 | } 50 | 51 | if r != 0 { 52 | return r, 2, SUCCESS 53 | } 54 | } else if p[1] >= 0x30 { 55 | // 4-byte character 56 | if len(p) < 4 { 57 | return 0, 0, NO_ROOM 58 | } 59 | if p[2] < 0x81 || p[2] > 0xfe || p[3] < 0x30 || p[3] > 0x39 { 60 | return 0xfffd, 1, INVALID_CHAR 61 | } 62 | 63 | code := uint32(p[0])<<24 + uint32(p[1])<<16 + uint32(p[2])<<8 + uint32(p[3]) 64 | lin := gb18030Linear(code) 65 | 66 | if lin <= maxGB18030Linear { 67 | r = rune(gb18030LinearToUnicode[lin]) 68 | if r != 0 { 69 | return r, 4, SUCCESS 70 | } 71 | } 72 | 73 | for _, rng := range gb18030Ranges { 74 | if lin >= rng.firstGB && lin <= rng.lastGB { 75 | return rng.firstRune + rune(lin) - rune(rng.firstGB), 4, SUCCESS 76 | } 77 | } 78 | } 79 | 80 | return 0xfffd, 1, INVALID_CHAR 81 | } 82 | 83 | func encodeGB18030Rune(p []byte, r rune) (size int, status Status) { 84 | if len(p) == 0 { 85 | status = NO_ROOM 86 | return 87 | } 88 | 89 | if r < 128 { 90 | p[0] = byte(r) 91 | return 1, SUCCESS 92 | } 93 | 94 | if len(p) < 2 { 95 | status = NO_ROOM 96 | return 97 | } 98 | 99 | var c uint16 100 | if r < 0x10000 { 101 | c = unicodeToGBK[r] 102 | } else { 103 | c = unicodeToGBKExtra[r] 104 | } 105 | 106 | if c != 0 { 107 | p[0] = byte(c >> 8) 108 | p[1] = byte(c) 109 | return 2, SUCCESS 110 | } 111 | 112 | if len(p) < 4 { 113 | return 0, NO_ROOM 114 | } 115 | 116 | if r < 0x10000 { 117 | f := unicodeToGB18030[r] 118 | if f != 0 { 119 | p[0] = byte(f >> 24) 120 | p[1] = byte(f >> 16) 121 | p[2] = byte(f >> 8) 122 | p[3] = byte(f) 123 | return 4, SUCCESS 124 | } 125 | } 126 | 127 | for _, rng := range gb18030Ranges { 128 | if r >= rng.firstRune && r <= rng.lastRune { 129 | lin := rng.firstGB + uint32(r) - uint32(rng.firstRune) 130 | p[0] = byte(lin/(10*126*10)) + 0x81 131 | p[1] = byte(lin/(126*10)%10) + 0x30 132 | p[2] = byte(lin/10%126) + 0x81 133 | p[3] = byte(lin%10) + 0x30 134 | return 4, SUCCESS 135 | } 136 | } 137 | 138 | p[0] = 0x1a 139 | return 1, INVALID_CHAR 140 | } 141 | 142 | var gb18030Once sync.Once 143 | 144 | // Mapping from gb18039Linear values to Unicode. 145 | var gb18030LinearToUnicode []uint16 146 | 147 | var unicodeToGB18030 []uint32 148 | 149 | func buildGB18030Tables() { 150 | gb18030LinearToUnicode = make([]uint16, maxGB18030Linear+1) 151 | unicodeToGB18030 = make([]uint32, 65536) 152 | for _, data := range gb18030Data { 153 | gb18030LinearToUnicode[gb18030Linear(data.gb18030)] = data.unicode 154 | unicodeToGB18030[data.unicode] = data.gb18030 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /gbk.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | // Converters for GBK encoding. 4 | 5 | func init() { 6 | RegisterCharset(&Charset{ 7 | Name: "GBK", 8 | Aliases: []string{"GB2312"}, // GBK is a superset of GB2312. 9 | NewDecoder: func() Decoder { 10 | return decodeGBKRune 11 | }, 12 | NewEncoder: func() Encoder { 13 | return encodeGBKRune 14 | }, 15 | }) 16 | } 17 | 18 | func decodeGBKRune(p []byte) (r rune, size int, status Status) { 19 | if len(p) == 0 { 20 | status = NO_ROOM 21 | return 22 | } 23 | 24 | b := p[0] 25 | if b < 128 { 26 | return rune(b), 1, SUCCESS 27 | } 28 | 29 | if len(p) < 2 { 30 | status = NO_ROOM 31 | return 32 | } 33 | 34 | c := uint16(p[0])<<8 + uint16(p[1]) 35 | r = rune(gbkToUnicode[c]) 36 | if r == 0 { 37 | r = gbkToUnicodeExtra[c] 38 | } 39 | 40 | if r != 0 { 41 | return r, 2, SUCCESS 42 | } 43 | 44 | return 0xfffd, 1, INVALID_CHAR 45 | } 46 | 47 | func encodeGBKRune(p []byte, r rune) (size int, status Status) { 48 | if len(p) == 0 { 49 | status = NO_ROOM 50 | return 51 | } 52 | 53 | if r < 128 { 54 | p[0] = byte(r) 55 | return 1, SUCCESS 56 | } 57 | 58 | if len(p) < 2 { 59 | status = NO_ROOM 60 | return 61 | } 62 | 63 | var c uint16 64 | if r < 0x10000 { 65 | c = unicodeToGBK[r] 66 | } else { 67 | c = unicodeToGBKExtra[r] 68 | } 69 | 70 | if c != 0 { 71 | p[0] = byte(c >> 8) 72 | p[1] = byte(c) 73 | return 2, SUCCESS 74 | } 75 | 76 | p[0] = 0x1a 77 | return 1, INVALID_CHAR 78 | } 79 | -------------------------------------------------------------------------------- /iso2022jp.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | import ( 4 | "unicode/utf8" 5 | ) 6 | 7 | // converters for ISO-2022-JP encoding 8 | 9 | const esc = 27 10 | 11 | func init() { 12 | type jpEncoding int 13 | const ( 14 | ascii jpEncoding = iota 15 | jisX0201Roman 16 | jisX0208 17 | ) 18 | 19 | RegisterCharset(&Charset{ 20 | Name: "ISO-2022-JP", 21 | NewDecoder: func() Decoder { 22 | encoding := ascii 23 | return func(p []byte) (c rune, size int, status Status) { 24 | if len(p) == 0 { 25 | return 0, 0, NO_ROOM 26 | } 27 | 28 | b := p[0] 29 | if b == esc { 30 | if len(p) < 3 { 31 | return 0, 0, NO_ROOM 32 | } 33 | switch p[1] { 34 | case '(': 35 | switch p[2] { 36 | case 'B': 37 | encoding = ascii 38 | return 0, 3, STATE_ONLY 39 | 40 | case 'J': 41 | encoding = jisX0201Roman 42 | return 0, 3, STATE_ONLY 43 | } 44 | 45 | case '$': 46 | switch p[2] { 47 | case '@', 'B': 48 | encoding = jisX0208 49 | return 0, 3, STATE_ONLY 50 | } 51 | } 52 | } 53 | 54 | switch encoding { 55 | case ascii: 56 | if b > 127 { 57 | return utf8.RuneError, 1, INVALID_CHAR 58 | } 59 | return rune(b), 1, SUCCESS 60 | 61 | case jisX0201Roman: 62 | if b > 127 { 63 | return utf8.RuneError, 1, INVALID_CHAR 64 | } 65 | switch b { 66 | case '\\': 67 | return 0xA5, 1, SUCCESS 68 | case '~': 69 | return 0x203E, 1, SUCCESS 70 | } 71 | return rune(b), 1, SUCCESS 72 | 73 | case jisX0208: 74 | return jis0208Table.DecodeLow(p) 75 | } 76 | panic("unreachable") 77 | } 78 | }, 79 | NewEncoder: func() Encoder { 80 | jis0208Table.Reverse() 81 | encoding := ascii 82 | return func(p []byte, c rune) (size int, status Status) { 83 | if len(p) == 0 { 84 | return 0, NO_ROOM 85 | } 86 | 87 | if c < 128 { 88 | if encoding != ascii { 89 | if len(p) < 4 { 90 | return 0, NO_ROOM 91 | } 92 | p[0], p[1], p[2] = esc, '(', 'B' 93 | p[3] = byte(c) 94 | encoding = ascii 95 | return 4, SUCCESS 96 | } 97 | p[0] = byte(c) 98 | return 1, SUCCESS 99 | } 100 | 101 | if c > 65535 { 102 | return 0, INVALID_CHAR 103 | } 104 | jis := jis0208Table.FromUnicode[c] 105 | if jis == [2]byte{0, 0} && c != rune(jis0208Table.Data[0][0]) { 106 | return 0, INVALID_CHAR 107 | } 108 | 109 | if encoding != jisX0208 { 110 | if len(p) < 3 { 111 | return 0, NO_ROOM 112 | } 113 | p[0], p[1], p[2] = esc, '$', 'B' 114 | encoding = jisX0208 115 | return 3, STATE_ONLY 116 | } 117 | 118 | p[0] = jis[0] + 0x21 119 | p[1] = jis[1] + 0x21 120 | return 2, SUCCESS 121 | } 122 | }, 123 | }) 124 | } 125 | -------------------------------------------------------------------------------- /jis0201-data.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | var jis0201ToUnicode = [256]uint16{ 4 | 0x20: 0x0020, // SPACE 5 | 0x21: 0x0021, // EXCLAMATION MARK 6 | 0x22: 0x0022, // QUOTATION MARK 7 | 0x23: 0x0023, // NUMBER SIGN 8 | 0x24: 0x0024, // DOLLAR SIGN 9 | 0x25: 0x0025, // PERCENT SIGN 10 | 0x26: 0x0026, // AMPERSAND 11 | 0x27: 0x0027, // APOSTROPHE 12 | 0x28: 0x0028, // LEFT PARENTHESIS 13 | 0x29: 0x0029, // RIGHT PARENTHESIS 14 | 0x2A: 0x002A, // ASTERISK 15 | 0x2B: 0x002B, // PLUS SIGN 16 | 0x2C: 0x002C, // COMMA 17 | 0x2D: 0x002D, // HYPHEN-MINUS 18 | 0x2E: 0x002E, // FULL STOP 19 | 0x2F: 0x002F, // SOLIDUS 20 | 0x30: 0x0030, // DIGIT ZERO 21 | 0x31: 0x0031, // DIGIT ONE 22 | 0x32: 0x0032, // DIGIT TWO 23 | 0x33: 0x0033, // DIGIT THREE 24 | 0x34: 0x0034, // DIGIT FOUR 25 | 0x35: 0x0035, // DIGIT FIVE 26 | 0x36: 0x0036, // DIGIT SIX 27 | 0x37: 0x0037, // DIGIT SEVEN 28 | 0x38: 0x0038, // DIGIT EIGHT 29 | 0x39: 0x0039, // DIGIT NINE 30 | 0x3A: 0x003A, // COLON 31 | 0x3B: 0x003B, // SEMICOLON 32 | 0x3C: 0x003C, // LESS-THAN SIGN 33 | 0x3D: 0x003D, // EQUALS SIGN 34 | 0x3E: 0x003E, // GREATER-THAN SIGN 35 | 0x3F: 0x003F, // QUESTION MARK 36 | 0x40: 0x0040, // COMMERCIAL AT 37 | 0x41: 0x0041, // LATIN CAPITAL LETTER A 38 | 0x42: 0x0042, // LATIN CAPITAL LETTER B 39 | 0x43: 0x0043, // LATIN CAPITAL LETTER C 40 | 0x44: 0x0044, // LATIN CAPITAL LETTER D 41 | 0x45: 0x0045, // LATIN CAPITAL LETTER E 42 | 0x46: 0x0046, // LATIN CAPITAL LETTER F 43 | 0x47: 0x0047, // LATIN CAPITAL LETTER G 44 | 0x48: 0x0048, // LATIN CAPITAL LETTER H 45 | 0x49: 0x0049, // LATIN CAPITAL LETTER I 46 | 0x4A: 0x004A, // LATIN CAPITAL LETTER J 47 | 0x4B: 0x004B, // LATIN CAPITAL LETTER K 48 | 0x4C: 0x004C, // LATIN CAPITAL LETTER L 49 | 0x4D: 0x004D, // LATIN CAPITAL LETTER M 50 | 0x4E: 0x004E, // LATIN CAPITAL LETTER N 51 | 0x4F: 0x004F, // LATIN CAPITAL LETTER O 52 | 0x50: 0x0050, // LATIN CAPITAL LETTER P 53 | 0x51: 0x0051, // LATIN CAPITAL LETTER Q 54 | 0x52: 0x0052, // LATIN CAPITAL LETTER R 55 | 0x53: 0x0053, // LATIN CAPITAL LETTER S 56 | 0x54: 0x0054, // LATIN CAPITAL LETTER T 57 | 0x55: 0x0055, // LATIN CAPITAL LETTER U 58 | 0x56: 0x0056, // LATIN CAPITAL LETTER V 59 | 0x57: 0x0057, // LATIN CAPITAL LETTER W 60 | 0x58: 0x0058, // LATIN CAPITAL LETTER X 61 | 0x59: 0x0059, // LATIN CAPITAL LETTER Y 62 | 0x5A: 0x005A, // LATIN CAPITAL LETTER Z 63 | 0x5B: 0x005B, // LEFT SQUARE BRACKET 64 | 0x5C: 0x00A5, // YEN SIGN 65 | 0x5D: 0x005D, // RIGHT SQUARE BRACKET 66 | 0x5E: 0x005E, // CIRCUMFLEX ACCENT 67 | 0x5F: 0x005F, // LOW LINE 68 | 0x60: 0x0060, // GRAVE ACCENT 69 | 0x61: 0x0061, // LATIN SMALL LETTER A 70 | 0x62: 0x0062, // LATIN SMALL LETTER B 71 | 0x63: 0x0063, // LATIN SMALL LETTER C 72 | 0x64: 0x0064, // LATIN SMALL LETTER D 73 | 0x65: 0x0065, // LATIN SMALL LETTER E 74 | 0x66: 0x0066, // LATIN SMALL LETTER F 75 | 0x67: 0x0067, // LATIN SMALL LETTER G 76 | 0x68: 0x0068, // LATIN SMALL LETTER H 77 | 0x69: 0x0069, // LATIN SMALL LETTER I 78 | 0x6A: 0x006A, // LATIN SMALL LETTER J 79 | 0x6B: 0x006B, // LATIN SMALL LETTER K 80 | 0x6C: 0x006C, // LATIN SMALL LETTER L 81 | 0x6D: 0x006D, // LATIN SMALL LETTER M 82 | 0x6E: 0x006E, // LATIN SMALL LETTER N 83 | 0x6F: 0x006F, // LATIN SMALL LETTER O 84 | 0x70: 0x0070, // LATIN SMALL LETTER P 85 | 0x71: 0x0071, // LATIN SMALL LETTER Q 86 | 0x72: 0x0072, // LATIN SMALL LETTER R 87 | 0x73: 0x0073, // LATIN SMALL LETTER S 88 | 0x74: 0x0074, // LATIN SMALL LETTER T 89 | 0x75: 0x0075, // LATIN SMALL LETTER U 90 | 0x76: 0x0076, // LATIN SMALL LETTER V 91 | 0x77: 0x0077, // LATIN SMALL LETTER W 92 | 0x78: 0x0078, // LATIN SMALL LETTER X 93 | 0x79: 0x0079, // LATIN SMALL LETTER Y 94 | 0x7A: 0x007A, // LATIN SMALL LETTER Z 95 | 0x7B: 0x007B, // LEFT CURLY BRACKET 96 | 0x7C: 0x007C, // VERTICAL LINE 97 | 0x7D: 0x007D, // RIGHT CURLY BRACKET 98 | 0x7E: 0x203E, // OVERLINE 99 | 0xA1: 0xFF61, // HALFWIDTH IDEOGRAPHIC FULL STOP 100 | 0xA2: 0xFF62, // HALFWIDTH LEFT CORNER BRACKET 101 | 0xA3: 0xFF63, // HALFWIDTH RIGHT CORNER BRACKET 102 | 0xA4: 0xFF64, // HALFWIDTH IDEOGRAPHIC COMMA 103 | 0xA5: 0xFF65, // HALFWIDTH KATAKANA MIDDLE DOT 104 | 0xA6: 0xFF66, // HALFWIDTH KATAKANA LETTER WO 105 | 0xA7: 0xFF67, // HALFWIDTH KATAKANA LETTER SMALL A 106 | 0xA8: 0xFF68, // HALFWIDTH KATAKANA LETTER SMALL I 107 | 0xA9: 0xFF69, // HALFWIDTH KATAKANA LETTER SMALL U 108 | 0xAA: 0xFF6A, // HALFWIDTH KATAKANA LETTER SMALL E 109 | 0xAB: 0xFF6B, // HALFWIDTH KATAKANA LETTER SMALL O 110 | 0xAC: 0xFF6C, // HALFWIDTH KATAKANA LETTER SMALL YA 111 | 0xAD: 0xFF6D, // HALFWIDTH KATAKANA LETTER SMALL YU 112 | 0xAE: 0xFF6E, // HALFWIDTH KATAKANA LETTER SMALL YO 113 | 0xAF: 0xFF6F, // HALFWIDTH KATAKANA LETTER SMALL TU 114 | 0xB0: 0xFF70, // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK 115 | 0xB1: 0xFF71, // HALFWIDTH KATAKANA LETTER A 116 | 0xB2: 0xFF72, // HALFWIDTH KATAKANA LETTER I 117 | 0xB3: 0xFF73, // HALFWIDTH KATAKANA LETTER U 118 | 0xB4: 0xFF74, // HALFWIDTH KATAKANA LETTER E 119 | 0xB5: 0xFF75, // HALFWIDTH KATAKANA LETTER O 120 | 0xB6: 0xFF76, // HALFWIDTH KATAKANA LETTER KA 121 | 0xB7: 0xFF77, // HALFWIDTH KATAKANA LETTER KI 122 | 0xB8: 0xFF78, // HALFWIDTH KATAKANA LETTER KU 123 | 0xB9: 0xFF79, // HALFWIDTH KATAKANA LETTER KE 124 | 0xBA: 0xFF7A, // HALFWIDTH KATAKANA LETTER KO 125 | 0xBB: 0xFF7B, // HALFWIDTH KATAKANA LETTER SA 126 | 0xBC: 0xFF7C, // HALFWIDTH KATAKANA LETTER SI 127 | 0xBD: 0xFF7D, // HALFWIDTH KATAKANA LETTER SU 128 | 0xBE: 0xFF7E, // HALFWIDTH KATAKANA LETTER SE 129 | 0xBF: 0xFF7F, // HALFWIDTH KATAKANA LETTER SO 130 | 0xC0: 0xFF80, // HALFWIDTH KATAKANA LETTER TA 131 | 0xC1: 0xFF81, // HALFWIDTH KATAKANA LETTER TI 132 | 0xC2: 0xFF82, // HALFWIDTH KATAKANA LETTER TU 133 | 0xC3: 0xFF83, // HALFWIDTH KATAKANA LETTER TE 134 | 0xC4: 0xFF84, // HALFWIDTH KATAKANA LETTER TO 135 | 0xC5: 0xFF85, // HALFWIDTH KATAKANA LETTER NA 136 | 0xC6: 0xFF86, // HALFWIDTH KATAKANA LETTER NI 137 | 0xC7: 0xFF87, // HALFWIDTH KATAKANA LETTER NU 138 | 0xC8: 0xFF88, // HALFWIDTH KATAKANA LETTER NE 139 | 0xC9: 0xFF89, // HALFWIDTH KATAKANA LETTER NO 140 | 0xCA: 0xFF8A, // HALFWIDTH KATAKANA LETTER HA 141 | 0xCB: 0xFF8B, // HALFWIDTH KATAKANA LETTER HI 142 | 0xCC: 0xFF8C, // HALFWIDTH KATAKANA LETTER HU 143 | 0xCD: 0xFF8D, // HALFWIDTH KATAKANA LETTER HE 144 | 0xCE: 0xFF8E, // HALFWIDTH KATAKANA LETTER HO 145 | 0xCF: 0xFF8F, // HALFWIDTH KATAKANA LETTER MA 146 | 0xD0: 0xFF90, // HALFWIDTH KATAKANA LETTER MI 147 | 0xD1: 0xFF91, // HALFWIDTH KATAKANA LETTER MU 148 | 0xD2: 0xFF92, // HALFWIDTH KATAKANA LETTER ME 149 | 0xD3: 0xFF93, // HALFWIDTH KATAKANA LETTER MO 150 | 0xD4: 0xFF94, // HALFWIDTH KATAKANA LETTER YA 151 | 0xD5: 0xFF95, // HALFWIDTH KATAKANA LETTER YU 152 | 0xD6: 0xFF96, // HALFWIDTH KATAKANA LETTER YO 153 | 0xD7: 0xFF97, // HALFWIDTH KATAKANA LETTER RA 154 | 0xD8: 0xFF98, // HALFWIDTH KATAKANA LETTER RI 155 | 0xD9: 0xFF99, // HALFWIDTH KATAKANA LETTER RU 156 | 0xDA: 0xFF9A, // HALFWIDTH KATAKANA LETTER RE 157 | 0xDB: 0xFF9B, // HALFWIDTH KATAKANA LETTER RO 158 | 0xDC: 0xFF9C, // HALFWIDTH KATAKANA LETTER WA 159 | 0xDD: 0xFF9D, // HALFWIDTH KATAKANA LETTER N 160 | 0xDE: 0xFF9E, // HALFWIDTH KATAKANA VOICED SOUND MARK 161 | 0xDF: 0xFF9F, // HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK 162 | } 163 | -------------------------------------------------------------------------------- /kuten.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | import ( 4 | "sync" 5 | "unicode/utf8" 6 | ) 7 | 8 | // A kutenTable holds the data for a double-byte character set, arranged by ku 9 | // (区, zone) and ten (点, position). These can be converted to various actual 10 | // encoding schemes. 11 | type kutenTable struct { 12 | // Data[ku][ten] is the unicode value for the character at that zone and 13 | // position. 14 | Data [94][94]uint16 15 | 16 | // FromUnicode holds the ku and ten for each Unicode code point. 17 | // It is not available until Reverse() has been called. 18 | FromUnicode [][2]byte 19 | 20 | // once is used to synchronize the generation of FromUnicode. 21 | once sync.Once 22 | } 23 | 24 | // Reverse generates FromUnicode. 25 | func (t *kutenTable) Reverse() { 26 | t.once.Do(func() { 27 | t.FromUnicode = make([][2]byte, 65536) 28 | for ku := range t.Data { 29 | for ten, unicode := range t.Data[ku] { 30 | t.FromUnicode[unicode] = [2]byte{byte(ku), byte(ten)} 31 | } 32 | } 33 | }) 34 | } 35 | 36 | // DecodeLow decodes a character from an encoding that does not have the high 37 | // bit set. 38 | func (t *kutenTable) DecodeLow(p []byte) (c rune, size int, status Status) { 39 | if len(p) < 2 { 40 | return 0, 0, NO_ROOM 41 | } 42 | ku := p[0] - 0x21 43 | ten := p[1] - 0x21 44 | if ku > 93 || ten > 93 { 45 | return utf8.RuneError, 1, INVALID_CHAR 46 | } 47 | u := t.Data[ku][ten] 48 | if u == 0 { 49 | return utf8.RuneError, 1, INVALID_CHAR 50 | } 51 | return rune(u), 2, SUCCESS 52 | } 53 | 54 | // DecodeHigh decodes a character from an encoding that has the high bit set. 55 | func (t *kutenTable) DecodeHigh(p []byte) (c rune, size int, status Status) { 56 | if len(p) < 2 { 57 | return 0, 0, NO_ROOM 58 | } 59 | ku := p[0] - 0xa1 60 | ten := p[1] - 0xa1 61 | if ku > 93 || ten > 93 { 62 | return utf8.RuneError, 1, INVALID_CHAR 63 | } 64 | u := t.Data[ku][ten] 65 | if u == 0 { 66 | return utf8.RuneError, 1, INVALID_CHAR 67 | } 68 | return rune(u), 2, SUCCESS 69 | } 70 | 71 | // EncodeHigh encodes a character in an encoding that has the high bit set. 72 | func (t *kutenTable) EncodeHigh(p []byte, c rune) (size int, status Status) { 73 | if len(p) < 2 { 74 | return 0, NO_ROOM 75 | } 76 | if c > 0xffff { 77 | p[0] = '?' 78 | return 1, INVALID_CHAR 79 | } 80 | kuten := t.FromUnicode[c] 81 | if kuten == [2]byte{0, 0} && c != rune(t.Data[0][0]) { 82 | p[0] = '?' 83 | return 1, INVALID_CHAR 84 | } 85 | p[0] = kuten[0] + 0xa1 86 | p[1] = kuten[1] + 0xa1 87 | return 2, SUCCESS 88 | } 89 | -------------------------------------------------------------------------------- /mahonia_test.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | import ( 4 | "bytes" 5 | "io/ioutil" 6 | "testing" 7 | ) 8 | 9 | var nameTests = map[string]string{ 10 | "utf8": "utf8", 11 | "ISO 8859-1": "iso88591", 12 | "Big5": "big5", 13 | "": "", 14 | } 15 | 16 | func TestSimplifyName(t *testing.T) { 17 | for name, simple := range nameTests { 18 | if simple != simplifyName(name) { 19 | t.Errorf("%s came out as %s instead of as %s", name, simplifyName(name), simple) 20 | } 21 | } 22 | } 23 | 24 | var testData = []struct { 25 | utf8, other, otherEncoding string 26 | }{ 27 | {"Résumé", "Résumé", "utf8"}, 28 | {"Résumé", "R\xe9sum\xe9", "latin-1"}, 29 | {"これは漢字です。", "S0\x8c0o0\"oW[g0Y0\x020", "UTF-16LE"}, 30 | {"これは漢字です。", "0S0\x8c0oo\"[W0g0Y0\x02", "UTF-16BE"}, 31 | {"これは漢字です。", "\xfe\xff0S0\x8c0oo\"[W0g0Y0\x02", "UTF-16"}, 32 | {"𝄢𝄞𝄪𝄫", "\xfe\xff\xd8\x34\xdd\x22\xd8\x34\xdd\x1e\xd8\x34\xdd\x2a\xd8\x34\xdd\x2b", "UTF-16"}, 33 | {"Hello, world", "Hello, world", "ASCII"}, 34 | {"Gdańsk", "Gda\xf1sk", "ISO-8859-2"}, 35 | {"Ââ Čč Đđ Ŋŋ Õõ Šš Žž Åå Ää", "\xc2\xe2 \xc8\xe8 \xa9\xb9 \xaf\xbf \xd5\xf5 \xaa\xba \xac\xbc \xc5\xe5 \xc4\xe4", "ISO-8859-10"}, 36 | {"สำหรับ", "\xca\xd3\xcb\xc3\u047a", "ISO-8859-11"}, 37 | {"latviešu", "latvie\xf0u", "ISO-8859-13"}, 38 | {"Seònaid", "Se\xf2naid", "ISO-8859-14"}, 39 | {"€1 is cheap", "\xa41 is cheap", "ISO-8859-15"}, 40 | {"românește", "rom\xe2ne\xbate", "ISO-8859-16"}, 41 | {"nutraĵo", "nutra\xbco", "ISO-8859-3"}, 42 | {"Kalâdlit", "Kal\xe2dlit", "ISO-8859-4"}, 43 | {"русский", "\xe0\xe3\xe1\xe1\xda\xd8\xd9", "ISO-8859-5"}, 44 | {"ελληνικά", "\xe5\xeb\xeb\xe7\xed\xe9\xea\xdc", "ISO-8859-7"}, 45 | {"Kağan", "Ka\xf0an", "ISO-8859-9"}, 46 | {"Résumé", "R\x8esum\x8e", "macintosh"}, 47 | {"Gdańsk", "Gda\xf1sk", "windows-1250"}, 48 | {"русский", "\xf0\xf3\xf1\xf1\xea\xe8\xe9", "windows-1251"}, 49 | {"Résumé", "R\xe9sum\xe9", "windows-1252"}, 50 | {"ελληνικά", "\xe5\xeb\xeb\xe7\xed\xe9\xea\xdc", "windows-1253"}, 51 | {"Kağan", "Ka\xf0an", "windows-1254"}, 52 | {"עִבְרִית", "\xf2\xc4\xe1\xc0\xf8\xc4\xe9\xfa", "windows-1255"}, 53 | {"العربية", "\xc7\xe1\xda\xd1\xc8\xed\xc9", "windows-1256"}, 54 | {"latviešu", "latvie\xf0u", "windows-1257"}, 55 | {"Việt", "Vi\xea\xf2t", "windows-1258"}, 56 | {"สำหรับ", "\xca\xd3\xcb\xc3\u047a", "windows-874"}, 57 | {"русский", "\xd2\xd5\xd3\xd3\xcb\xc9\xca", "KOI8-R"}, 58 | {"українська", "\xd5\xcb\xd2\xc1\xa7\xce\xd3\xd8\xcb\xc1", "KOI8-U"}, 59 | {"Hello 常用國字標準字體表", "Hello \xb1`\xa5\u03b0\xea\xa6r\xbc\u0437\u01e6r\xc5\xe9\xaa\xed", "big5"}, 60 | {"Hello 常用國字標準字體表", "Hello \xb3\xa3\xd3\xc3\x87\xf8\xd7\xd6\x98\xcb\x9c\xca\xd7\xd6\xf3\x77\xb1\xed", "gbk"}, 61 | {"Hello 常用國字標準字體表", "Hello \xb3\xa3\xd3\xc3\x87\xf8\xd7\xd6\x98\xcb\x9c\xca\xd7\xd6\xf3\x77\xb1\xed", "gb18030"}, 62 | {"עִבְרִית", "\x81\x30\xfb\x30\x81\x30\xf6\x34\x81\x30\xf9\x33\x81\x30\xf6\x30\x81\x30\xfb\x36\x81\x30\xf6\x34\x81\x30\xfa\x31\x81\x30\xfb\x38", "gb18030"}, 63 | {"㧯", "\x82\x31\x89\x38", "gb18030"}, 64 | {"これは漢字です。", "\x82\xb1\x82\xea\x82\xcd\x8a\xbf\x8e\x9a\x82\xc5\x82\xb7\x81B", "SJIS"}, 65 | {"Hello, 世界!", "Hello, \x90\xa2\x8aE!", "SJIS"}, 66 | {"イウエオカ", "\xb2\xb3\xb4\xb5\xb6", "SJIS"}, 67 | {"これは漢字です。", "\xa4\xb3\xa4\xec\xa4\u03f4\xc1\xbb\xfa\xa4\u01e4\xb9\xa1\xa3", "EUC-JP"}, 68 | {"これは漢字です。", "\xa4\xb3\xa4\xec\xa4\u03f4\xc1\xbb\xfa\xa4\u01e4\xb9\xa1\xa3", "CP51932"}, 69 | {"Thông tin bạn đồng hànhỌ", "Th\xabng tin b\xb9n \xae\xe5ng h\xb5nhO\xe4", "TCVN3"}, 70 | {"Hello, 世界!", "Hello, \x1b$B@$3&\x1b(B!", "ISO-2022-JP"}, 71 | {"네이트 | 즐거움의 시작, 슈파스(Spaβ) NATE", "\xb3\xd7\xc0\xcc\xc6\xae | \xc1\xf1\xb0\xc5\xbf\xf2\xc0\xc7 \xbd\xc3\xc0\xdb, \xbd\xb4\xc6\xc4\xbd\xba(Spa\xa5\xe2) NATE", "EUC-KR"}, 72 | } 73 | 74 | func TestDecode(t *testing.T) { 75 | for _, data := range testData { 76 | d := NewDecoder(data.otherEncoding) 77 | if d == nil { 78 | t.Errorf("Could not create decoder for %s", data.otherEncoding) 79 | continue 80 | } 81 | 82 | str := d.ConvertString(data.other) 83 | 84 | if str != data.utf8 { 85 | t.Errorf("Unexpected value: %#v (expected %#v)", str, data.utf8) 86 | } 87 | } 88 | } 89 | 90 | func TestDecodeTranslate(t *testing.T) { 91 | for _, data := range testData { 92 | d := NewDecoder(data.otherEncoding) 93 | if d == nil { 94 | t.Errorf("Could not create decoder for %s", data.otherEncoding) 95 | continue 96 | } 97 | 98 | _, cdata, _ := d.Translate([]byte(data.other), true) 99 | str := string(cdata) 100 | 101 | if str != data.utf8 { 102 | t.Errorf("Unexpected value: %#v (expected %#v)", str, data.utf8) 103 | } 104 | } 105 | } 106 | 107 | func TestEncode(t *testing.T) { 108 | for _, data := range testData { 109 | e := NewEncoder(data.otherEncoding) 110 | if e == nil { 111 | t.Errorf("Could not create encoder for %s", data.otherEncoding) 112 | continue 113 | } 114 | 115 | str := e.ConvertString(data.utf8) 116 | 117 | if str != data.other { 118 | t.Errorf("Unexpected value: %#v (expected %#v)", str, data.other) 119 | } 120 | } 121 | } 122 | 123 | func TestReader(t *testing.T) { 124 | for _, data := range testData { 125 | d := NewDecoder(data.otherEncoding) 126 | if d == nil { 127 | t.Errorf("Could not create decoder for %s", data.otherEncoding) 128 | continue 129 | } 130 | 131 | b := bytes.NewBufferString(data.other) 132 | r := d.NewReader(b) 133 | result, _ := ioutil.ReadAll(r) 134 | str := string(result) 135 | 136 | if str != data.utf8 { 137 | t.Errorf("Unexpected value: %#v (expected %#v)", str, data.utf8) 138 | } 139 | } 140 | } 141 | 142 | func TestWriter(t *testing.T) { 143 | for _, data := range testData { 144 | e := NewEncoder(data.otherEncoding) 145 | if e == nil { 146 | t.Errorf("Could not create encoder for %s", data.otherEncoding) 147 | continue 148 | } 149 | 150 | b := new(bytes.Buffer) 151 | w := e.NewWriter(b) 152 | w.Write([]byte(data.utf8)) 153 | str := b.String() 154 | 155 | if str != data.other { 156 | t.Errorf("Unexpected value: %#v (expected %#v)", str, data.other) 157 | } 158 | } 159 | } 160 | 161 | func TestFallback(t *testing.T) { 162 | mixed := "résum\xe9 " // The space is needed because of the issue mentioned in the Note: in fallback.go 163 | pure := "résumé " 164 | d := FallbackDecoder(NewDecoder("utf8"), NewDecoder("ISO-8859-1")) 165 | result := d.ConvertString(mixed) 166 | if result != pure { 167 | t.Errorf("Unexpected value: %#v (expected %#v)", result, pure) 168 | } 169 | } 170 | 171 | func TestEntities(t *testing.T) { 172 | escaped := "¬it; I'm ∉ I tell you‚ ≪⃒ " 173 | plain := "¬it; I'm ∉ I tell you\u201a \u226A\u20D2 " 174 | d := FallbackDecoder(EntityDecoder(), NewDecoder("ISO-8859-1")) 175 | result := d.ConvertString(escaped) 176 | if result != plain { 177 | t.Errorf("Unexpected value: %#v (expected %#v)", result, plain) 178 | } 179 | } 180 | 181 | func TestConvertStringOK(t *testing.T) { 182 | d := NewDecoder("ASCII") 183 | if d == nil { 184 | t.Fatal("Could not create decoder for ASCII") 185 | } 186 | 187 | str, ok := d.ConvertStringOK("hello") 188 | if !ok { 189 | t.Error("Spurious error found while decoding") 190 | } 191 | if str != "hello" { 192 | t.Errorf("expected %#v, got %#v", "hello", str) 193 | } 194 | 195 | str, ok = d.ConvertStringOK("\x80") 196 | if ok { 197 | t.Error(`Failed to detect error decoding "\x80"`) 198 | } 199 | 200 | e := NewEncoder("ISO-8859-3") 201 | if e == nil { 202 | t.Fatal("Could not create encoder for ISO-8859-1") 203 | } 204 | 205 | str, ok = e.ConvertStringOK("nutraĵo") 206 | if !ok { 207 | t.Error("spurious error while encoding") 208 | } 209 | if str != "nutra\xbco" { 210 | t.Errorf("expected %#v, got %#v", "nutra\xbco", str) 211 | } 212 | 213 | str, ok = e.ConvertStringOK("\x80abc") 214 | if ok { 215 | t.Error("failed to detect invalid UTF-8 while encoding") 216 | } 217 | 218 | str, ok = e.ConvertStringOK("русский") 219 | if ok { 220 | t.Error("failed to detect characters that couldn't be encoded") 221 | } 222 | } 223 | 224 | func TestBadCharset(t *testing.T) { 225 | d := NewDecoder("this is not a valid charset") 226 | if d != nil { 227 | t.Fatal("got a non-nil decoder for an invalid charset") 228 | } 229 | } 230 | -------------------------------------------------------------------------------- /mahoniconv/mahoniconv.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "github.com/henrylee2cn/mahonia" 6 | "io" 7 | "log" 8 | "os" 9 | ) 10 | 11 | // An iconv workalike using mahonia. 12 | 13 | var from = flag.String("f", "utf-8", "source character set") 14 | var to = flag.String("t", "utf-8", "destination character set") 15 | 16 | func main() { 17 | flag.Parse() 18 | 19 | var r io.Reader = os.Stdin 20 | var w io.Writer = os.Stdout 21 | 22 | if *from != "utf-8" { 23 | decode := mahonia.NewDecoder(*from) 24 | if decode == nil { 25 | log.Fatalf("Could not create decoder for %s", *from) 26 | } 27 | r = decode.NewReader(r) 28 | } 29 | 30 | if *to != "utf-8" { 31 | encode := mahonia.NewEncoder(*to) 32 | if encode == nil { 33 | log.Fatalf("Could not create decoder for %s", *to) 34 | } 35 | w = encode.NewWriter(w) 36 | } 37 | 38 | io.Copy(w, r) 39 | } 40 | -------------------------------------------------------------------------------- /mbcs.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | // Generic converters for multibyte character sets. 4 | 5 | // An mbcsTrie contains the data to convert from the character set to Unicode. 6 | // If a character would be encoded as "\x01\x02\x03", its unicode value would be found at t.children[1].children[2].children[3].rune 7 | // children either is nil or has 256 elements. 8 | type mbcsTrie struct { 9 | // For leaf nodes, the Unicode character that is represented. 10 | char rune 11 | 12 | // For non-leaf nodes, the trie to decode the remainder of the character. 13 | children []mbcsTrie 14 | } 15 | 16 | // A MBCSTable holds the data to convert to and from Unicode. 17 | type MBCSTable struct { 18 | toUnicode mbcsTrie 19 | fromUnicode map[rune]string 20 | } 21 | 22 | // AddCharacter adds a character to the table. rune is its Unicode code point, 23 | // and bytes contains the bytes used to encode it in the character set. 24 | func (table *MBCSTable) AddCharacter(c rune, bytes string) { 25 | if table.fromUnicode == nil { 26 | table.fromUnicode = make(map[rune]string) 27 | } 28 | 29 | table.fromUnicode[c] = bytes 30 | 31 | trie := &table.toUnicode 32 | for i := 0; i < len(bytes); i++ { 33 | if trie.children == nil { 34 | trie.children = make([]mbcsTrie, 256) 35 | } 36 | 37 | b := bytes[i] 38 | trie = &trie.children[b] 39 | } 40 | 41 | trie.char = c 42 | } 43 | 44 | func (table *MBCSTable) Decoder() Decoder { 45 | return func(p []byte) (c rune, size int, status Status) { 46 | if len(p) == 0 { 47 | status = NO_ROOM 48 | return 49 | } 50 | 51 | if p[0] == 0 { 52 | return 0, 1, SUCCESS 53 | } 54 | 55 | trie := &table.toUnicode 56 | for trie.char == 0 { 57 | if trie.children == nil { 58 | return 0xfffd, 1, INVALID_CHAR 59 | } 60 | if len(p) < size+1 { 61 | return 0, 0, NO_ROOM 62 | } 63 | 64 | trie = &trie.children[p[size]] 65 | size++ 66 | } 67 | 68 | c = trie.char 69 | status = SUCCESS 70 | return 71 | } 72 | } 73 | 74 | func (table *MBCSTable) Encoder() Encoder { 75 | return func(p []byte, c rune) (size int, status Status) { 76 | bytes := table.fromUnicode[c] 77 | if bytes == "" { 78 | if len(p) > 0 { 79 | p[0] = '?' 80 | return 1, INVALID_CHAR 81 | } else { 82 | return 0, NO_ROOM 83 | } 84 | } 85 | 86 | if len(p) < len(bytes) { 87 | return 0, NO_ROOM 88 | } 89 | 90 | return copy(p, bytes), SUCCESS 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /reader.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | // This file is based on bufio.Reader in the Go standard library, 4 | // which has the following copyright notice: 5 | 6 | // Copyright 2009 The Go Authors. All rights reserved. 7 | // Use of this source code is governed by a BSD-style 8 | // license that can be found in the LICENSE file. 9 | 10 | import ( 11 | "io" 12 | "unicode/utf8" 13 | ) 14 | 15 | const ( 16 | defaultBufSize = 4096 17 | ) 18 | 19 | // Reader implements character-set decoding for an io.Reader object. 20 | type Reader struct { 21 | buf []byte 22 | rd io.Reader 23 | decode Decoder 24 | r, w int 25 | err error 26 | } 27 | 28 | // NewReader creates a new Reader that uses the receiver to decode text. 29 | func (d Decoder) NewReader(rd io.Reader) *Reader { 30 | b := new(Reader) 31 | b.buf = make([]byte, defaultBufSize) 32 | b.rd = rd 33 | b.decode = d 34 | return b 35 | } 36 | 37 | // fill reads a new chunk into the buffer. 38 | func (b *Reader) fill() { 39 | // Slide existing data to beginning. 40 | if b.r > 0 { 41 | copy(b.buf, b.buf[b.r:b.w]) 42 | b.w -= b.r 43 | b.r = 0 44 | } 45 | 46 | // Read new data. 47 | n, e := b.rd.Read(b.buf[b.w:]) 48 | b.w += n 49 | if e != nil { 50 | b.err = e 51 | } 52 | } 53 | 54 | // Read reads data into p. 55 | // It returns the number of bytes read into p. 56 | // It calls Read at most once on the underlying Reader, 57 | // hence n may be less than len(p). 58 | // At EOF, the count will be zero and err will be os.EOF. 59 | func (b *Reader) Read(p []byte) (n int, err error) { 60 | n = len(p) 61 | filled := false 62 | if n == 0 { 63 | return 0, b.err 64 | } 65 | if b.w == b.r { 66 | if b.err != nil { 67 | return 0, b.err 68 | } 69 | if n > len(b.buf) { 70 | // Large read, empty buffer. 71 | // Allocate a larger buffer for efficiency. 72 | b.buf = make([]byte, n) 73 | } 74 | b.fill() 75 | filled = true 76 | if b.w == b.r { 77 | return 0, b.err 78 | } 79 | } 80 | 81 | i := 0 82 | for i < n { 83 | rune, size, status := b.decode(b.buf[b.r:b.w]) 84 | 85 | if status == STATE_ONLY { 86 | b.r += size 87 | continue 88 | } 89 | 90 | if status == NO_ROOM { 91 | if b.err != nil { 92 | rune = 0xfffd 93 | size = b.w - b.r 94 | if size == 0 { 95 | break 96 | } 97 | status = INVALID_CHAR 98 | } else if filled { 99 | break 100 | } else { 101 | b.fill() 102 | filled = true 103 | continue 104 | } 105 | } 106 | 107 | if i+utf8.RuneLen(rune) > n { 108 | break 109 | } 110 | 111 | b.r += size 112 | if rune < 128 { 113 | p[i] = byte(rune) 114 | i++ 115 | } else { 116 | i += utf8.EncodeRune(p[i:], rune) 117 | } 118 | } 119 | 120 | return i, nil 121 | } 122 | 123 | // ReadRune reads a single Unicode character and returns the 124 | // rune and its size in bytes. 125 | func (b *Reader) ReadRune() (c rune, size int, err error) { 126 | read: 127 | c, size, status := b.decode(b.buf[b.r:b.w]) 128 | 129 | if status == NO_ROOM && b.err == nil { 130 | b.fill() 131 | goto read 132 | } 133 | 134 | if status == STATE_ONLY { 135 | b.r += size 136 | goto read 137 | } 138 | 139 | if b.r == b.w { 140 | return 0, 0, b.err 141 | } 142 | 143 | if status == NO_ROOM { 144 | c = 0xfffd 145 | size = b.w - b.r 146 | status = INVALID_CHAR 147 | } 148 | 149 | b.r += size 150 | return c, size, nil 151 | } 152 | -------------------------------------------------------------------------------- /shiftjis.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | // Converters for the Shift-JIS encoding. 4 | 5 | import ( 6 | "unicode/utf8" 7 | ) 8 | 9 | func init() { 10 | RegisterCharset(&Charset{ 11 | Name: "Shift_JIS", 12 | Aliases: []string{"MS_Kanji", "csShiftJIS", "SJIS", "ibm-943", "windows-31j", "cp932", "windows-932"}, 13 | NewDecoder: func() Decoder { 14 | return decodeSJIS 15 | }, 16 | NewEncoder: func() Encoder { 17 | shiftJISOnce.Do(reverseShiftJISTable) 18 | return encodeSJIS 19 | }, 20 | }) 21 | } 22 | 23 | func decodeSJIS(p []byte) (c rune, size int, status Status) { 24 | if len(p) == 0 { 25 | return 0, 0, NO_ROOM 26 | } 27 | 28 | b := p[0] 29 | if b < 0x80 { 30 | return rune(b), 1, SUCCESS 31 | } 32 | 33 | if 0xa1 <= b && b <= 0xdf { 34 | return rune(b) + (0xff61 - 0xa1), 1, SUCCESS 35 | } 36 | 37 | if b == 0x80 || b == 0xa0 { 38 | return utf8.RuneError, 1, INVALID_CHAR 39 | } 40 | 41 | if len(p) < 2 { 42 | return 0, 0, NO_ROOM 43 | } 44 | 45 | jis := int(b)<<8 + int(p[1]) 46 | c = rune(shiftJISToUnicode[jis]) 47 | 48 | if c == 0 { 49 | return utf8.RuneError, 2, INVALID_CHAR 50 | } 51 | return c, 2, SUCCESS 52 | } 53 | 54 | func encodeSJIS(p []byte, c rune) (size int, status Status) { 55 | if len(p) == 0 { 56 | return 0, NO_ROOM 57 | } 58 | 59 | if c < 0x80 { 60 | p[0] = byte(c) 61 | return 1, SUCCESS 62 | } 63 | 64 | if 0xff61 <= c && c <= 0xff9f { 65 | // half-width katakana 66 | p[0] = byte(c - (0xff61 - 0xa1)) 67 | return 1, SUCCESS 68 | } 69 | 70 | if len(p) < 2 { 71 | return 0, NO_ROOM 72 | } 73 | 74 | if c > 0xffff { 75 | p[0] = '?' 76 | return 1, INVALID_CHAR 77 | } 78 | 79 | jis := unicodeToShiftJIS[c] 80 | if jis == 0 { 81 | p[0] = '?' 82 | return 1, INVALID_CHAR 83 | } 84 | 85 | p[0] = byte(jis >> 8) 86 | p[1] = byte(jis) 87 | return 2, SUCCESS 88 | } 89 | -------------------------------------------------------------------------------- /tcvn3.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | // Converters for TCVN3 encoding. 4 | 5 | import ( 6 | "sync" 7 | ) 8 | 9 | var ( 10 | onceTCVN3 sync.Once 11 | dataTCVN3 = struct { 12 | UnicodeToWord map[rune][2]byte 13 | WordToUnicode [256]struct { 14 | r rune 15 | m *[256]rune 16 | } 17 | }{} 18 | ) 19 | 20 | func init() { 21 | p := new(Charset) 22 | p.Name = "TCVN3" 23 | p.NewDecoder = func() Decoder { 24 | onceTCVN3.Do(buildTCVN3Tables) 25 | return decodeTCVN3 26 | } 27 | p.NewEncoder = func() Encoder { 28 | onceTCVN3.Do(buildTCVN3Tables) 29 | return encodeTCVN3 30 | } 31 | RegisterCharset(p) 32 | } 33 | 34 | func decodeTCVN3(p []byte) (rune, int, Status) { 35 | if len(p) == 0 { 36 | return 0, 0, NO_ROOM 37 | } 38 | item := &dataTCVN3.WordToUnicode[p[0]] 39 | if item.m != nil && len(p) > 1 { 40 | if r := item.m[p[1]]; r != 0 { 41 | return r, 2, SUCCESS 42 | } 43 | } 44 | if item.r != 0 { 45 | return item.r, 1, SUCCESS 46 | } 47 | if p[0] < 0x80 { 48 | return rune(p[0]), 1, SUCCESS 49 | } 50 | return '?', 1, INVALID_CHAR 51 | } 52 | 53 | func encodeTCVN3(p []byte, c rune) (int, Status) { 54 | if len(p) == 0 { 55 | return 0, NO_ROOM 56 | } 57 | if c < rune(0x80) { 58 | p[0] = byte(c) 59 | return 1, SUCCESS 60 | } 61 | if v, ok := dataTCVN3.UnicodeToWord[c]; ok { 62 | if v[1] != 0 { 63 | if len(p) < 2 { 64 | return 0, NO_ROOM 65 | } 66 | p[0] = v[0] 67 | p[1] = v[1] 68 | return 2, SUCCESS 69 | } else { 70 | p[0] = v[0] 71 | return 1, SUCCESS 72 | } 73 | } 74 | p[0] = '?' 75 | return 1, INVALID_CHAR 76 | } 77 | 78 | func buildTCVN3Tables() { 79 | dataTCVN3.UnicodeToWord = map[rune][2]byte{ 80 | // one byte 81 | 0x00C2: {0xA2, 0x00}, 82 | 0x00CA: {0xA3, 0x00}, 83 | 0x00D4: {0xA4, 0x00}, 84 | 0x00E0: {0xB5, 0x00}, 85 | 0x00E1: {0xB8, 0x00}, 86 | 0x00E2: {0xA9, 0x00}, 87 | 0x00E3: {0xB7, 0x00}, 88 | 0x00E8: {0xCC, 0x00}, 89 | 0x00E9: {0xD0, 0x00}, 90 | 0x00EA: {0xAA, 0x00}, 91 | 0x00EC: {0xD7, 0x00}, 92 | 0x00ED: {0xDD, 0x00}, 93 | 0x00F2: {0xDF, 0x00}, 94 | 0x00F3: {0xE3, 0x00}, 95 | 0x00F4: {0xAB, 0x00}, 96 | 0x00F5: {0xE2, 0x00}, 97 | 0x00F9: {0xEF, 0x00}, 98 | 0x00FA: {0xF3, 0x00}, 99 | 0x00FD: {0xFD, 0x00}, 100 | 0x0102: {0xA1, 0x00}, 101 | 0x0103: {0xA8, 0x00}, 102 | 0x0110: {0xA7, 0x00}, 103 | 0x0111: {0xAE, 0x00}, 104 | 0x0129: {0xDC, 0x00}, 105 | 0x0169: {0xF2, 0x00}, 106 | 0x01A0: {0xA5, 0x00}, 107 | 0x01A1: {0xAC, 0x00}, 108 | 0x01AF: {0xA6, 0x00}, 109 | 0x01B0: {0xAD, 0x00}, 110 | 0x1EA1: {0xB9, 0x00}, 111 | 0x1EA3: {0xB6, 0x00}, 112 | 0x1EA5: {0xCA, 0x00}, 113 | 0x1EA7: {0xC7, 0x00}, 114 | 0x1EA9: {0xC8, 0x00}, 115 | 0x1EAB: {0xC9, 0x00}, 116 | 0x1EAD: {0xCB, 0x00}, 117 | 0x1EAF: {0xBE, 0x00}, 118 | 0x1EB1: {0xBB, 0x00}, 119 | 0x1EB3: {0xBC, 0x00}, 120 | 0x1EB5: {0xBD, 0x00}, 121 | 0x1EB7: {0xC6, 0x00}, 122 | 0x1EB9: {0xD1, 0x00}, 123 | 0x1EBB: {0xCE, 0x00}, 124 | 0x1EBD: {0xCF, 0x00}, 125 | 0x1EBF: {0xD5, 0x00}, 126 | 0x1EC1: {0xD2, 0x00}, 127 | 0x1EC3: {0xD3, 0x00}, 128 | 0x1EC5: {0xD4, 0x00}, 129 | 0x1EC7: {0xD6, 0x00}, 130 | 0x1EC9: {0xD8, 0x00}, 131 | 0x1ECB: {0xDE, 0x00}, 132 | 0x1ECD: {0xE4, 0x00}, 133 | 0x1ECF: {0xE1, 0x00}, 134 | 0x1ED1: {0xE8, 0x00}, 135 | 0x1ED3: {0xE5, 0x00}, 136 | 0x1ED5: {0xE6, 0x00}, 137 | 0x1ED7: {0xE7, 0x00}, 138 | 0x1ED9: {0xE9, 0x00}, 139 | 0x1EDB: {0xED, 0x00}, 140 | 0x1EDD: {0xEA, 0x00}, 141 | 0x1EDF: {0xEB, 0x00}, 142 | 0x1EE1: {0xEC, 0x00}, 143 | 0x1EE3: {0xEE, 0x00}, 144 | 0x1EE5: {0xF4, 0x00}, 145 | 0x1EE7: {0xF1, 0x00}, 146 | 0x1EE9: {0xF8, 0x00}, 147 | 0x1EEB: {0xF5, 0x00}, 148 | 0x1EED: {0xF6, 0x00}, 149 | 0x1EEF: {0xF7, 0x00}, 150 | 0x1EF1: {0xF9, 0x00}, 151 | 0x1EF3: {0xFA, 0x00}, 152 | 0x1EF5: {0xFE, 0x00}, 153 | 0x1EF7: {0xFB, 0x00}, 154 | 0x1EF9: {0xFC, 0x00}, 155 | // two bytes 156 | 0x00C0: {0x41, 0xB5}, 157 | 0x00C1: {0x41, 0xB8}, 158 | 0x00C3: {0x41, 0xB7}, 159 | 0x00C8: {0x45, 0xCC}, 160 | 0x00C9: {0x45, 0xD0}, 161 | 0x00CC: {0x49, 0xD7}, 162 | 0x00CD: {0x49, 0xDD}, 163 | 0x00D2: {0x4F, 0xDF}, 164 | 0x00D3: {0x4F, 0xE3}, 165 | 0x00D5: {0x4F, 0xE2}, 166 | 0x00D9: {0x55, 0xEF}, 167 | 0x00DA: {0x55, 0xF3}, 168 | 0x00DD: {0x59, 0xFD}, 169 | 0x0128: {0x49, 0xDC}, 170 | 0x0168: {0x55, 0xF2}, 171 | 0x1EA0: {0x41, 0xB9}, 172 | 0x1EA2: {0x41, 0xB6}, 173 | 0x1EA4: {0xA2, 0xCA}, 174 | 0x1EA6: {0xA2, 0xC7}, 175 | 0x1EA8: {0xA2, 0xC8}, 176 | 0x1EAA: {0xA2, 0xC9}, 177 | 0x1EAC: {0xA2, 0xCB}, 178 | 0x1EAE: {0xA1, 0xBE}, 179 | 0x1EB0: {0xA1, 0xBB}, 180 | 0x1EB2: {0xA1, 0xBC}, 181 | 0x1EB4: {0xA1, 0xBD}, 182 | 0x1EB6: {0xA1, 0xC6}, 183 | 0x1EB8: {0x45, 0xD1}, 184 | 0x1EBA: {0x45, 0xCE}, 185 | 0x1EBC: {0x45, 0xCF}, 186 | 0x1EBE: {0xA3, 0xD5}, 187 | 0x1EC0: {0xA3, 0xD2}, 188 | 0x1EC2: {0xA3, 0xD3}, 189 | 0x1EC4: {0xA3, 0xD4}, 190 | 0x1EC6: {0xA3, 0xD6}, 191 | 0x1EC8: {0x49, 0xD8}, 192 | 0x1ECA: {0x49, 0xDE}, 193 | 0x1ECC: {0x4F, 0xE4}, 194 | 0x1ECE: {0x4F, 0xE1}, 195 | 0x1ED0: {0xA4, 0xE8}, 196 | 0x1ED2: {0xA4, 0xE5}, 197 | 0x1ED4: {0xA4, 0xE6}, 198 | 0x1ED6: {0xA4, 0xE7}, 199 | 0x1ED8: {0xA4, 0xE9}, 200 | 0x1EDA: {0xA5, 0xED}, 201 | 0x1EDC: {0xA5, 0xEA}, 202 | 0x1EDE: {0xA5, 0xEB}, 203 | 0x1EE0: {0xA5, 0xEC}, 204 | 0x1EE2: {0xA5, 0xEE}, 205 | 0x1EE4: {0x55, 0xF4}, 206 | 0x1EE6: {0x55, 0xF1}, 207 | 0x1EE8: {0xA6, 0xF8}, 208 | 0x1EEA: {0xA6, 0xF5}, 209 | 0x1EEC: {0xA6, 0xF6}, 210 | 0x1EEE: {0xA6, 0xF7}, 211 | 0x1EF0: {0xA6, 0xF9}, 212 | 0x1EF2: {0x59, 0xFA}, 213 | 0x1EF4: {0x59, 0xFE}, 214 | 0x1EF6: {0x59, 0xFB}, 215 | 0x1EF8: {0x59, 0xFC}, 216 | } 217 | for r, b := range dataTCVN3.UnicodeToWord { 218 | item := &dataTCVN3.WordToUnicode[b[0]] 219 | if b[1] == 0 { 220 | item.r = r 221 | } else { 222 | if item.m == nil { 223 | item.m = new([256]rune) 224 | } 225 | item.m[b[1]] = r 226 | } 227 | } 228 | } 229 | -------------------------------------------------------------------------------- /translate.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | import "unicode/utf8" 4 | 5 | // Translate enables a Decoder to implement go-charset's Translator interface. 6 | func (d Decoder) Translate(data []byte, eof bool) (n int, cdata []byte, err error) { 7 | cdata = make([]byte, len(data)+1) 8 | destPos := 0 9 | 10 | for n < len(data) { 11 | rune, size, status := d(data[n:]) 12 | 13 | switch status { 14 | case STATE_ONLY: 15 | n += size 16 | continue 17 | 18 | case NO_ROOM: 19 | if !eof { 20 | return n, cdata[:destPos], nil 21 | } 22 | rune = 0xfffd 23 | n = len(data) 24 | 25 | default: 26 | n += size 27 | } 28 | 29 | if rune < 128 { 30 | if destPos >= len(cdata) { 31 | cdata = doubleLength(cdata) 32 | } 33 | cdata[destPos] = byte(rune) 34 | destPos++ 35 | } else { 36 | if destPos+utf8.RuneLen(rune) > len(cdata) { 37 | cdata = doubleLength(cdata) 38 | } 39 | destPos += utf8.EncodeRune(cdata[destPos:], rune) 40 | } 41 | } 42 | 43 | return n, cdata[:destPos], nil 44 | } 45 | 46 | func doubleLength(b []byte) []byte { 47 | b2 := make([]byte, 2*len(b)) 48 | copy(b2, b) 49 | return b2 50 | } 51 | -------------------------------------------------------------------------------- /utf16.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | import ( 4 | "unicode/utf16" 5 | ) 6 | 7 | func init() { 8 | for i := 0; i < len(utf16Charsets); i++ { 9 | RegisterCharset(&utf16Charsets[i]) 10 | } 11 | } 12 | 13 | var utf16Charsets = []Charset{ 14 | { 15 | Name: "UTF-16", 16 | NewDecoder: func() Decoder { 17 | var decodeRune Decoder 18 | return func(p []byte) (c rune, size int, status Status) { 19 | if decodeRune == nil { 20 | // haven't read the BOM yet 21 | if len(p) < 2 { 22 | status = NO_ROOM 23 | return 24 | } 25 | 26 | switch { 27 | case p[0] == 0xfe && p[1] == 0xff: 28 | decodeRune = decodeUTF16beRune 29 | return 0, 2, STATE_ONLY 30 | case p[0] == 0xff && p[1] == 0xfe: 31 | decodeRune = decodeUTF16leRune 32 | return 0, 2, STATE_ONLY 33 | default: 34 | decodeRune = decodeUTF16beRune 35 | } 36 | } 37 | 38 | return decodeRune(p) 39 | } 40 | }, 41 | NewEncoder: func() Encoder { 42 | wroteBOM := false 43 | return func(p []byte, c rune) (size int, status Status) { 44 | if !wroteBOM { 45 | if len(p) < 2 { 46 | status = NO_ROOM 47 | return 48 | } 49 | 50 | p[0] = 0xfe 51 | p[1] = 0xff 52 | wroteBOM = true 53 | return 2, STATE_ONLY 54 | } 55 | 56 | return encodeUTF16beRune(p, c) 57 | } 58 | }, 59 | }, 60 | { 61 | Name: "UTF-16BE", 62 | NewDecoder: func() Decoder { return decodeUTF16beRune }, 63 | NewEncoder: func() Encoder { return encodeUTF16beRune }, 64 | }, 65 | { 66 | Name: "UTF-16LE", 67 | NewDecoder: func() Decoder { return decodeUTF16leRune }, 68 | NewEncoder: func() Encoder { return encodeUTF16leRune }, 69 | }, 70 | } 71 | 72 | func decodeUTF16beRune(p []byte) (r rune, size int, status Status) { 73 | if len(p) < 2 { 74 | status = NO_ROOM 75 | return 76 | } 77 | 78 | c := rune(p[0])<<8 + rune(p[1]) 79 | 80 | if utf16.IsSurrogate(c) { 81 | if len(p) < 4 { 82 | status = NO_ROOM 83 | return 84 | } 85 | 86 | c2 := rune(p[2])<<8 + rune(p[3]) 87 | c = utf16.DecodeRune(c, c2) 88 | 89 | if c == 0xfffd { 90 | return c, 2, INVALID_CHAR 91 | } else { 92 | return c, 4, SUCCESS 93 | } 94 | } 95 | 96 | return c, 2, SUCCESS 97 | } 98 | 99 | func encodeUTF16beRune(p []byte, c rune) (size int, status Status) { 100 | if c < 0x10000 { 101 | if len(p) < 2 { 102 | status = NO_ROOM 103 | return 104 | } 105 | p[0] = byte(c >> 8) 106 | p[1] = byte(c) 107 | return 2, SUCCESS 108 | } 109 | 110 | if len(p) < 4 { 111 | status = NO_ROOM 112 | return 113 | } 114 | s1, s2 := utf16.EncodeRune(c) 115 | p[0] = byte(s1 >> 8) 116 | p[1] = byte(s1) 117 | p[2] = byte(s2 >> 8) 118 | p[3] = byte(s2) 119 | return 4, SUCCESS 120 | } 121 | 122 | func decodeUTF16leRune(p []byte) (r rune, size int, status Status) { 123 | if len(p) < 2 { 124 | status = NO_ROOM 125 | return 126 | } 127 | 128 | c := rune(p[1])<<8 + rune(p[0]) 129 | 130 | if utf16.IsSurrogate(c) { 131 | if len(p) < 4 { 132 | status = NO_ROOM 133 | return 134 | } 135 | 136 | c2 := rune(p[3])<<8 + rune(p[2]) 137 | c = utf16.DecodeRune(c, c2) 138 | 139 | if c == 0xfffd { 140 | return c, 2, INVALID_CHAR 141 | } else { 142 | return c, 4, SUCCESS 143 | } 144 | } 145 | 146 | return c, 2, SUCCESS 147 | } 148 | 149 | func encodeUTF16leRune(p []byte, c rune) (size int, status Status) { 150 | if c < 0x10000 { 151 | if len(p) < 2 { 152 | status = NO_ROOM 153 | return 154 | } 155 | p[1] = byte(c >> 8) 156 | p[0] = byte(c) 157 | return 2, SUCCESS 158 | } 159 | 160 | if len(p) < 4 { 161 | status = NO_ROOM 162 | return 163 | } 164 | s1, s2 := utf16.EncodeRune(c) 165 | p[1] = byte(s1 >> 8) 166 | p[0] = byte(s1) 167 | p[3] = byte(s2 >> 8) 168 | p[2] = byte(s2) 169 | return 4, SUCCESS 170 | } 171 | -------------------------------------------------------------------------------- /utf8.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | import "unicode/utf8" 4 | 5 | func init() { 6 | RegisterCharset(&Charset{ 7 | Name: "UTF-8", 8 | NewDecoder: func() Decoder { return decodeUTF8Rune }, 9 | NewEncoder: func() Encoder { return encodeUTF8Rune }, 10 | }) 11 | } 12 | 13 | func decodeUTF8Rune(p []byte) (c rune, size int, status Status) { 14 | if len(p) == 0 { 15 | status = NO_ROOM 16 | return 17 | } 18 | 19 | if p[0] < 128 { 20 | return rune(p[0]), 1, SUCCESS 21 | } 22 | 23 | c, size = utf8.DecodeRune(p) 24 | 25 | if c == 0xfffd { 26 | if utf8.FullRune(p) { 27 | status = INVALID_CHAR 28 | return 29 | } 30 | 31 | return 0, 0, NO_ROOM 32 | } 33 | 34 | status = SUCCESS 35 | return 36 | } 37 | 38 | func encodeUTF8Rune(p []byte, c rune) (size int, status Status) { 39 | size = utf8.RuneLen(c) 40 | if size > len(p) { 41 | return 0, NO_ROOM 42 | } 43 | 44 | return utf8.EncodeRune(p, c), SUCCESS 45 | } 46 | -------------------------------------------------------------------------------- /writer.go: -------------------------------------------------------------------------------- 1 | package mahonia 2 | 3 | import ( 4 | "io" 5 | "unicode/utf8" 6 | ) 7 | 8 | // Writer implements character-set encoding for an io.Writer object. 9 | type Writer struct { 10 | wr io.Writer 11 | encode Encoder 12 | inbuf []byte 13 | outbuf []byte 14 | } 15 | 16 | // NewWriter creates a new Writer that uses the receiver to encode text. 17 | func (e Encoder) NewWriter(wr io.Writer) *Writer { 18 | w := new(Writer) 19 | w.wr = wr 20 | w.encode = e 21 | return w 22 | } 23 | 24 | // Write encodes and writes the data from p. 25 | func (w *Writer) Write(p []byte) (n int, err error) { 26 | n = len(p) 27 | 28 | if len(w.inbuf) > 0 { 29 | w.inbuf = append(w.inbuf, p...) 30 | p = w.inbuf 31 | } 32 | 33 | if len(w.outbuf) < len(p) { 34 | w.outbuf = make([]byte, len(p)+10) 35 | } 36 | 37 | outpos := 0 38 | 39 | for len(p) > 0 { 40 | rune, size := utf8.DecodeRune(p) 41 | if rune == 0xfffd && !utf8.FullRune(p) { 42 | break 43 | } 44 | 45 | p = p[size:] 46 | 47 | retry: 48 | size, status := w.encode(w.outbuf[outpos:], rune) 49 | 50 | if status == NO_ROOM { 51 | newDest := make([]byte, len(w.outbuf)*2) 52 | copy(newDest, w.outbuf) 53 | w.outbuf = newDest 54 | goto retry 55 | } 56 | 57 | if status == STATE_ONLY { 58 | outpos += size 59 | goto retry 60 | } 61 | 62 | outpos += size 63 | } 64 | 65 | w.inbuf = w.inbuf[:0] 66 | if len(p) > 0 { 67 | w.inbuf = append(w.inbuf, p...) 68 | } 69 | 70 | n1, err := w.wr.Write(w.outbuf[0:outpos]) 71 | 72 | if err != nil && n1 < n { 73 | n = n1 74 | } 75 | 76 | return 77 | } 78 | 79 | func (w *Writer) WriteRune(c rune) (size int, err error) { 80 | if len(w.inbuf) > 0 { 81 | // There are leftover bytes, a partial UTF-8 sequence. 82 | w.inbuf = w.inbuf[:0] 83 | w.WriteRune(0xfffd) 84 | } 85 | 86 | if w.outbuf == nil { 87 | w.outbuf = make([]byte, 16) 88 | } 89 | 90 | outpos := 0 91 | 92 | retry: 93 | size, status := w.encode(w.outbuf[outpos:], c) 94 | 95 | if status == NO_ROOM { 96 | w.outbuf = make([]byte, len(w.outbuf)*2) 97 | goto retry 98 | } 99 | 100 | if status == STATE_ONLY { 101 | outpos += size 102 | goto retry 103 | } 104 | 105 | outpos += size 106 | 107 | return w.wr.Write(w.outbuf[0:outpos]) 108 | } 109 | --------------------------------------------------------------------------------