├── .gitignore ├── Makefile ├── README ├── config.ld ├── css-parser.lua ├── css.lua ├── data ├── entities.json └── jsontolua.lua ├── dom-sample.lua ├── encodings ├── index-ibm866.txt ├── index-iso-8859-10.txt ├── index-iso-8859-13.txt ├── index-iso-8859-14.txt ├── index-iso-8859-15.txt ├── index-iso-8859-16.txt ├── index-iso-8859-2.txt ├── index-iso-8859-3.txt ├── index-iso-8859-4.txt ├── index-iso-8859-5.txt ├── index-iso-8859-6.txt ├── index-iso-8859-7.txt ├── index-iso-8859-8.txt ├── index-koi8-r.txt ├── index-koi8-u.txt ├── index-macintosh.txt ├── index-windows-1250.txt ├── index-windows-1251.txt ├── index-windows-1252.txt ├── index-windows-1253.txt ├── index-windows-1254.txt ├── index-windows-1255.txt ├── index-windows-1256.txt ├── index-windows-1257.txt ├── index-windows-1258.txt ├── index-windows-874.txt ├── index-x-mac-cyrillic.txt └── make_encodings.lua ├── examples ├── simple.lua └── xmltotex.lua ├── ldoc-latex.lua ├── lexer.lua ├── luaxml-cssquery.lua ├── luaxml-domobject.lua ├── luaxml-encodings.lua ├── luaxml-entities.lua ├── luaxml-htmltemplates.lua ├── luaxml-mod-handler.lua ├── luaxml-mod-html.lua ├── luaxml-mod-xml.lua ├── luaxml-namedentities.lua ├── luaxml-parse-query.lua ├── luaxml-pretty.lua ├── luaxml-stack.lua ├── luaxml-sty.lua ├── luaxml-testxml.lua ├── luaxml-transform.lua ├── luaxml.pdf ├── luaxml.sty ├── luaxml.tex ├── rockspecs └── mhluaxml-dev-1.rockspec └── test ├── cssquery-test.lua ├── dom-test.lua ├── entities-test.lua ├── html-test.lua └── transform-test.lua /.gitignore: -------------------------------------------------------------------------------- 1 | *.aux 2 | *.fls 3 | *.toc 4 | *.swp 5 | *.out 6 | *.log 7 | *.fdb_latexmk 8 | build 9 | doc 10 | tags 11 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | lua_content = $(wildcard luaxml-*.lua) 2 | tex_content = $(wildcard *.tex) 3 | sty_content = $(wildcard *.sty) 4 | tests = $(wildcard test/*.lua) 5 | 6 | name = luaxml 7 | VERSION:= $(shell git --no-pager describe --abbrev=0 --tags --always ) 8 | DATE := $(firstword $(shell git --no-pager show --date=short --format="%ad" --name-only)) 9 | doc_file = luaxml.pdf 10 | TEXMFHOME = $(shell kpsewhich -var-value=TEXMFHOME) 11 | INSTALL_DIR = $(TEXMFHOME)/scripts/lua/$(name) 12 | MANUAL_DIR = $(TEXMFHOME)/doc/latex/$(name) 13 | STY_DIR = $(TEXMFHOME)/tex/latex/$(name) 14 | SYSTEM_BIN = /usr/local/bin 15 | BUILD_DIR = build 16 | BUILD_LUAXML = $(BUILD_DIR)/$(name) 17 | API_DOC = doc/api.tex 18 | API_SOURCES = luaxml-domobject.lua luaxml-cssquery.lua luaxml-transform.lua luaxml-mod-html.lua luaxml-encodings.lua luaxml-sty.lua 19 | LDOC_FILTER = ldoc-latex.lua 20 | LDOC = ldoc --all --filter ldoc-latex.filter 21 | ENTITIES_SOURCE = data/entities.json 22 | ENTITIES_MODULE = luaxml-namedentities.lua 23 | 24 | all: doc $(ENTITIES_MODULE) 25 | 26 | .PHONY: test $(ENTITIES_MODULE) 27 | 28 | 29 | 30 | doc: api $(doc_file) 31 | 32 | 33 | $(doc_file): $(name).tex $(API_DOC) $(ENTITIES_MODULE) 34 | latexmk -pdf -pdflatex='lualatex "\def\version{${VERSION}}\def\gitdate{${DATE}}\input{%S}"' $(name).tex 35 | 36 | api: $(API_DOC) 37 | 38 | $(API_DOC): $(API_SOURCES) $(LDOC_FILTER) 39 | mkdir -p doc 40 | $(LDOC) luaxml-domobject.lua > $(API_DOC) 41 | $(LDOC) luaxml-cssquery.lua >> $(API_DOC) 42 | $(LDOC) luaxml-transform.lua >> $(API_DOC) 43 | $(LDOC) luaxml-mod-html.lua >> $(API_DOC) 44 | $(LDOC) luaxml-encodings.lua >> $(API_DOC) 45 | $(LDOC) luaxml-sty.lua >> $(API_DOC) 46 | 47 | $(ENTITIES_MODULE): 48 | lua data/jsontolua.lua < $(ENTITIES_SOURCE) > $(ENTITIES_MODULE) 49 | 50 | test: 51 | texlua test/dom-test.lua 52 | texlua test/cssquery-test.lua 53 | texlua test/entities-test.lua 54 | texlua test/transform-test.lua 55 | texlua test/html-test.lua 56 | 57 | build: $(ENTITIES_MODULE) doc test $(lua_content) 58 | @rm -rf build 59 | @mkdir -p $(BUILD_LUAXML) 60 | @cp $(lua_content) $(tex_content) $(doc_file) $(ENTITIES_MODULE) $(BUILD_LUAXML) 61 | @cat README | sed -e "s/{{VERSION}}/${VERSION}/" | sed -e "s/{{DATE}}/${DATE}/" > $(BUILD_LUAXML)/README 62 | @cat luaxml.tex | sed -e "s/{{VERSION}}/${VERSION}/" > $(BUILD_LUAXML)/luaxml.tex 63 | @cat luaxml.sty | sed -e "s/{{VERSION}}/${VERSION}/" | sed -e "s/{{DATE}}/${DATE}/" > $(BUILD_LUAXML)/luaxml.sty 64 | @cd $(BUILD_DIR) && zip -r luaxml.zip luaxml 65 | 66 | install: doc $(lua_content) $(filters) 67 | mkdir -p $(INSTALL_DIR) 68 | mkdir -p $(MANUAL_DIR) 69 | mkdir -p $(STY_DIR) 70 | cp $(doc_file) $(MANUAL_DIR) 71 | cp $(lua_content) $(INSTALL_DIR) 72 | cp $(sty_content) $(STY_DIR) 73 | 74 | version: 75 | echo $(VERSION), $(DATE) 76 | 77 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | Introduction 2 | ============ 3 | 4 | LuaXML is pure lua library for reading and serializing of the XML files. Current release is aimed mainly as support 5 | for the odsfile package. The documentation was created by automatic conversion of original documentation in the source code. 6 | In this version, some files not useful for luaTeX were dropped. 7 | 8 | 9 | Install 10 | ======= 11 | 12 | LuaXML is installed in TeX distributions, so you don't need to install it yourself. If you want to try the development version, 13 | then clone this repository and run 14 | 15 | make install 16 | 17 | Please note that you will need [LDoc](https://stevedonovan.github.io/ldoc/manual/doc.md.html#Processing_Single_Modules) and 18 | [dkjson](http://dkolf.de/dkjson-lua/) Lua modules installed on your system. You can install them using: 19 | 20 | $ luarocks install --local ldoc 21 | $ luarocks install --local dkjson 22 | 23 | 24 | License: 25 | ======== 26 | 27 | This code is freely distributable under the terms of the Lua license 28 | (http://www.lua.org/copyright.html) 29 | 30 | 31 | Author 32 | ------ 33 | Michal Hoftich 34 | Email: michal.h21@gmail.com 35 | Version: {{VERSION}}, {{DATE}} 36 | 37 | Original authors: Paul Chakravarti and Manoel Campos (https://about.me/manoelcampos) 38 | 39 | If you are interested in the process of development you may observe 40 | 41 | https://github.com/michal-h21/LuaXML 42 | 43 | -------------------------------------------------------------------------------- /config.ld: -------------------------------------------------------------------------------- 1 | file = {"luaxml-domobject.lua", "luaxml-cssquery.lua"} 2 | -------------------------------------------------------------------------------- /css-parser.lua: -------------------------------------------------------------------------------- 1 | local csslexer = require('lexer').load("css") 2 | local CssParser = {} 3 | CssParser.__index = CssParser 4 | 5 | function CssParser.new() 6 | local self = setmetatable({}, CssParser) 7 | -- tokens from each processed source are saved in subtable 8 | self.tokens = {} 9 | -- source counter 10 | self.current_source = 0 11 | return self 12 | end 13 | 14 | function CssParser.tokenize(self, src) 15 | self.current_source = self.current_source + 1 16 | local tokens = csslexer:lex(src) 17 | local start = 1 18 | for i = 1, #tokens, 2 do 19 | local token_type, len = tokens[i], tokens[i+1] 20 | local contents = src:sub(start, len-1) 21 | self:add_token(token_type, contents) 22 | -- print(t,len, src:sub(start, len-1)) 23 | start = len 24 | end 25 | end 26 | 27 | function CssParser.add_token(self,token_type, contents) 28 | -- add token for the current source 29 | local current = self.current_source or 0 30 | local tokens = self.tokens[current] or {} 31 | table.insert(tokens, {type = token_type, contents = contents}) 32 | self.tokens[current] = tokens 33 | end 34 | 35 | 36 | 37 | 38 | 39 | local src = [[ 40 | 51 | ]] 52 | 53 | local parser = CssParser.new() 54 | parser:tokenize(src) 55 | 56 | for i = 0, parser.current_source do 57 | local current = parser.tokens[i] or {} 58 | for k, v in ipairs(current) do 59 | print(k, v.type, v.contents) 60 | end 61 | end 62 | 63 | -------------------------------------------------------------------------------- /data/jsontolua.lua: -------------------------------------------------------------------------------- 1 | -- convert json file with html named entities to Lua table 2 | -- json source: https://html.spec.whatwg.org/entities.json 3 | local json = require "dkjson" 4 | local data = io.read("*all") 5 | 6 | local function sorted(json_data) 7 | -- we need to sort entity names alphabetically to get good order each time we run this script (for git) 8 | local t = {} 9 | for k in pairs(json_data) do 10 | table.insert(t, k) 11 | end 12 | table.sort(t) 13 | return t 14 | end 15 | 16 | local json_data = json.decode(data) 17 | print("return {") 18 | for _, name in ipairs(sorted(json_data)) do 19 | local rec = json_data[name] 20 | print(string.format('["%s"]="%s",',name:gsub("[&;]", ""), rec.characters:gsub('\\', '\\\\'):gsub("\n", '\\n'):gsub('"', '\\"'))) 21 | end 22 | 23 | print "}" 24 | -------------------------------------------------------------------------------- /dom-sample.lua: -------------------------------------------------------------------------------- 1 | --kpse.set_program_name("luatex") 2 | function traverseDom(current,level) 3 | local level = level or 0 4 | local spaces = string.rep(" ",level) 5 | local root= current or current.root 6 | local name = root._name or "unnamed" 7 | local xtype = root._type or "untyped" 8 | local attributes = root._attr or {} 9 | if xtype == "TEXT" then 10 | print(spaces .."TEXT : " .. root._text) 11 | else 12 | print(spaces .. xtype .. " : " .. name) 13 | end 14 | for k, v in pairs(attributes) do 15 | print(spaces .. " ".. k.."="..v) 16 | end 17 | local children = root._children or {} 18 | for _, child in ipairs(children) do 19 | traverseDom(child, level + 1) 20 | end 21 | end 22 | 23 | local xml = require('luaxml-mod-xml') 24 | local handler = require('luaxml-mod-handler') 25 | local x = '

hello world, how are you?

' 26 | local domHandler = handler.domHandler() 27 | local parser = xml.xmlParser(domHandler) 28 | parser:parse(x) 29 | traverseDom(domHandler.root) 30 | -------------------------------------------------------------------------------- /encodings/index-ibm866.txt: -------------------------------------------------------------------------------- 1 | # For details on index index-ibm866.txt see the Encoding Standard 2 | # https://encoding.spec.whatwg.org/ 3 | # 4 | # Identifier: db6fe14a559d1601a7667338d83704773d5708dbc641e1ad3c5e21405770f05e 5 | # Date: 2018-01-06 6 | 7 | 0 0x0410 А (CYRILLIC CAPITAL LETTER A) 8 | 1 0x0411 Б (CYRILLIC CAPITAL LETTER BE) 9 | 2 0x0412 В (CYRILLIC CAPITAL LETTER VE) 10 | 3 0x0413 Г (CYRILLIC CAPITAL LETTER GHE) 11 | 4 0x0414 Д (CYRILLIC CAPITAL LETTER DE) 12 | 5 0x0415 Е (CYRILLIC CAPITAL LETTER IE) 13 | 6 0x0416 Ж (CYRILLIC CAPITAL LETTER ZHE) 14 | 7 0x0417 З (CYRILLIC CAPITAL LETTER ZE) 15 | 8 0x0418 И (CYRILLIC CAPITAL LETTER I) 16 | 9 0x0419 Й (CYRILLIC CAPITAL LETTER SHORT I) 17 | 10 0x041A К (CYRILLIC CAPITAL LETTER KA) 18 | 11 0x041B Л (CYRILLIC CAPITAL LETTER EL) 19 | 12 0x041C М (CYRILLIC CAPITAL LETTER EM) 20 | 13 0x041D Н (CYRILLIC CAPITAL LETTER EN) 21 | 14 0x041E О (CYRILLIC CAPITAL LETTER O) 22 | 15 0x041F П (CYRILLIC CAPITAL LETTER PE) 23 | 16 0x0420 Р (CYRILLIC CAPITAL LETTER ER) 24 | 17 0x0421 С (CYRILLIC CAPITAL LETTER ES) 25 | 18 0x0422 Т (CYRILLIC CAPITAL LETTER TE) 26 | 19 0x0423 У (CYRILLIC CAPITAL LETTER U) 27 | 20 0x0424 Ф (CYRILLIC CAPITAL LETTER EF) 28 | 21 0x0425 Х (CYRILLIC CAPITAL LETTER HA) 29 | 22 0x0426 Ц (CYRILLIC CAPITAL LETTER TSE) 30 | 23 0x0427 Ч (CYRILLIC CAPITAL LETTER CHE) 31 | 24 0x0428 Ш (CYRILLIC CAPITAL LETTER SHA) 32 | 25 0x0429 Щ (CYRILLIC CAPITAL LETTER SHCHA) 33 | 26 0x042A Ъ (CYRILLIC CAPITAL LETTER HARD SIGN) 34 | 27 0x042B Ы (CYRILLIC CAPITAL LETTER YERU) 35 | 28 0x042C Ь (CYRILLIC CAPITAL LETTER SOFT SIGN) 36 | 29 0x042D Э (CYRILLIC CAPITAL LETTER E) 37 | 30 0x042E Ю (CYRILLIC CAPITAL LETTER YU) 38 | 31 0x042F Я (CYRILLIC CAPITAL LETTER YA) 39 | 32 0x0430 а (CYRILLIC SMALL LETTER A) 40 | 33 0x0431 б (CYRILLIC SMALL LETTER BE) 41 | 34 0x0432 в (CYRILLIC SMALL LETTER VE) 42 | 35 0x0433 г (CYRILLIC SMALL LETTER GHE) 43 | 36 0x0434 д (CYRILLIC SMALL LETTER DE) 44 | 37 0x0435 е (CYRILLIC SMALL LETTER IE) 45 | 38 0x0436 ж (CYRILLIC SMALL LETTER ZHE) 46 | 39 0x0437 з (CYRILLIC SMALL LETTER ZE) 47 | 40 0x0438 и (CYRILLIC SMALL LETTER I) 48 | 41 0x0439 й (CYRILLIC SMALL LETTER SHORT I) 49 | 42 0x043A к (CYRILLIC SMALL LETTER KA) 50 | 43 0x043B л (CYRILLIC SMALL LETTER EL) 51 | 44 0x043C м (CYRILLIC SMALL LETTER EM) 52 | 45 0x043D н (CYRILLIC SMALL LETTER EN) 53 | 46 0x043E о (CYRILLIC SMALL LETTER O) 54 | 47 0x043F п (CYRILLIC SMALL LETTER PE) 55 | 48 0x2591 ░ (LIGHT SHADE) 56 | 49 0x2592 ▒ (MEDIUM SHADE) 57 | 50 0x2593 ▓ (DARK SHADE) 58 | 51 0x2502 │ (BOX DRAWINGS LIGHT VERTICAL) 59 | 52 0x2524 ┤ (BOX DRAWINGS LIGHT VERTICAL AND LEFT) 60 | 53 0x2561 ╡ (BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE) 61 | 54 0x2562 ╢ (BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE) 62 | 55 0x2556 ╖ (BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE) 63 | 56 0x2555 ╕ (BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE) 64 | 57 0x2563 ╣ (BOX DRAWINGS DOUBLE VERTICAL AND LEFT) 65 | 58 0x2551 ║ (BOX DRAWINGS DOUBLE VERTICAL) 66 | 59 0x2557 ╗ (BOX DRAWINGS DOUBLE DOWN AND LEFT) 67 | 60 0x255D ╝ (BOX DRAWINGS DOUBLE UP AND LEFT) 68 | 61 0x255C ╜ (BOX DRAWINGS UP DOUBLE AND LEFT SINGLE) 69 | 62 0x255B ╛ (BOX DRAWINGS UP SINGLE AND LEFT DOUBLE) 70 | 63 0x2510 ┐ (BOX DRAWINGS LIGHT DOWN AND LEFT) 71 | 64 0x2514 └ (BOX DRAWINGS LIGHT UP AND RIGHT) 72 | 65 0x2534 ┴ (BOX DRAWINGS LIGHT UP AND HORIZONTAL) 73 | 66 0x252C ┬ (BOX DRAWINGS LIGHT DOWN AND HORIZONTAL) 74 | 67 0x251C ├ (BOX DRAWINGS LIGHT VERTICAL AND RIGHT) 75 | 68 0x2500 ─ (BOX DRAWINGS LIGHT HORIZONTAL) 76 | 69 0x253C ┼ (BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL) 77 | 70 0x255E ╞ (BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE) 78 | 71 0x255F ╟ (BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE) 79 | 72 0x255A ╚ (BOX DRAWINGS DOUBLE UP AND RIGHT) 80 | 73 0x2554 ╔ (BOX DRAWINGS DOUBLE DOWN AND RIGHT) 81 | 74 0x2569 ╩ (BOX DRAWINGS DOUBLE UP AND HORIZONTAL) 82 | 75 0x2566 ╦ (BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL) 83 | 76 0x2560 ╠ (BOX DRAWINGS DOUBLE VERTICAL AND RIGHT) 84 | 77 0x2550 ═ (BOX DRAWINGS DOUBLE HORIZONTAL) 85 | 78 0x256C ╬ (BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL) 86 | 79 0x2567 ╧ (BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE) 87 | 80 0x2568 ╨ (BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE) 88 | 81 0x2564 ╤ (BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE) 89 | 82 0x2565 ╥ (BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE) 90 | 83 0x2559 ╙ (BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE) 91 | 84 0x2558 ╘ (BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE) 92 | 85 0x2552 ╒ (BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE) 93 | 86 0x2553 ╓ (BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE) 94 | 87 0x256B ╫ (BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE) 95 | 88 0x256A ╪ (BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE) 96 | 89 0x2518 ┘ (BOX DRAWINGS LIGHT UP AND LEFT) 97 | 90 0x250C ┌ (BOX DRAWINGS LIGHT DOWN AND RIGHT) 98 | 91 0x2588 █ (FULL BLOCK) 99 | 92 0x2584 ▄ (LOWER HALF BLOCK) 100 | 93 0x258C ▌ (LEFT HALF BLOCK) 101 | 94 0x2590 ▐ (RIGHT HALF BLOCK) 102 | 95 0x2580 ▀ (UPPER HALF BLOCK) 103 | 96 0x0440 р (CYRILLIC SMALL LETTER ER) 104 | 97 0x0441 с (CYRILLIC SMALL LETTER ES) 105 | 98 0x0442 т (CYRILLIC SMALL LETTER TE) 106 | 99 0x0443 у (CYRILLIC SMALL LETTER U) 107 | 100 0x0444 ф (CYRILLIC SMALL LETTER EF) 108 | 101 0x0445 х (CYRILLIC SMALL LETTER HA) 109 | 102 0x0446 ц (CYRILLIC SMALL LETTER TSE) 110 | 103 0x0447 ч (CYRILLIC SMALL LETTER CHE) 111 | 104 0x0448 ш (CYRILLIC SMALL LETTER SHA) 112 | 105 0x0449 щ (CYRILLIC SMALL LETTER SHCHA) 113 | 106 0x044A ъ (CYRILLIC SMALL LETTER HARD SIGN) 114 | 107 0x044B ы (CYRILLIC SMALL LETTER YERU) 115 | 108 0x044C ь (CYRILLIC SMALL LETTER SOFT SIGN) 116 | 109 0x044D э (CYRILLIC SMALL LETTER E) 117 | 110 0x044E ю (CYRILLIC SMALL LETTER YU) 118 | 111 0x044F я (CYRILLIC SMALL LETTER YA) 119 | 112 0x0401 Ё (CYRILLIC CAPITAL LETTER IO) 120 | 113 0x0451 ё (CYRILLIC SMALL LETTER IO) 121 | 114 0x0404 Є (CYRILLIC CAPITAL LETTER UKRAINIAN IE) 122 | 115 0x0454 є (CYRILLIC SMALL LETTER UKRAINIAN IE) 123 | 116 0x0407 Ї (CYRILLIC CAPITAL LETTER YI) 124 | 117 0x0457 ї (CYRILLIC SMALL LETTER YI) 125 | 118 0x040E Ў (CYRILLIC CAPITAL LETTER SHORT U) 126 | 119 0x045E ў (CYRILLIC SMALL LETTER SHORT U) 127 | 120 0x00B0 ° (DEGREE SIGN) 128 | 121 0x2219 ∙ (BULLET OPERATOR) 129 | 122 0x00B7 · (MIDDLE DOT) 130 | 123 0x221A √ (SQUARE ROOT) 131 | 124 0x2116 № (NUMERO SIGN) 132 | 125 0x00A4 ¤ (CURRENCY SIGN) 133 | 126 0x25A0 ■ (BLACK SQUARE) 134 | 127 0x00A0   (NO-BREAK SPACE) 135 | -------------------------------------------------------------------------------- /encodings/index-iso-8859-10.txt: -------------------------------------------------------------------------------- 1 | # For details on index index-iso-8859-10.txt see the Encoding Standard 2 | # https://encoding.spec.whatwg.org/ 3 | # 4 | # Identifier: 02c2b5590d8ccda9931008c471f6ee2c590b2c8fe5e6ccb3b08638115d778507 5 | # Date: 2018-01-06 6 | 7 | 0 0x0080 € () 8 | 1 0x0081  () 9 | 2 0x0082 ‚ () 10 | 3 0x0083 ƒ () 11 | 4 0x0084 „ () 12 | 5 0x0085 … () 13 | 6 0x0086 † () 14 | 7 0x0087 ‡ () 15 | 8 0x0088 ˆ () 16 | 9 0x0089 ‰ () 17 | 10 0x008A Š () 18 | 11 0x008B ‹ () 19 | 12 0x008C Œ () 20 | 13 0x008D  () 21 | 14 0x008E Ž () 22 | 15 0x008F  () 23 | 16 0x0090  () 24 | 17 0x0091 ‘ () 25 | 18 0x0092 ’ () 26 | 19 0x0093 “ () 27 | 20 0x0094 ” () 28 | 21 0x0095 • () 29 | 22 0x0096 – () 30 | 23 0x0097 — () 31 | 24 0x0098 ˜ () 32 | 25 0x0099 ™ () 33 | 26 0x009A š () 34 | 27 0x009B › () 35 | 28 0x009C œ () 36 | 29 0x009D  () 37 | 30 0x009E ž () 38 | 31 0x009F Ÿ () 39 | 32 0x00A0   (NO-BREAK SPACE) 40 | 33 0x0104 Ą (LATIN CAPITAL LETTER A WITH OGONEK) 41 | 34 0x0112 Ē (LATIN CAPITAL LETTER E WITH MACRON) 42 | 35 0x0122 Ģ (LATIN CAPITAL LETTER G WITH CEDILLA) 43 | 36 0x012A Ī (LATIN CAPITAL LETTER I WITH MACRON) 44 | 37 0x0128 Ĩ (LATIN CAPITAL LETTER I WITH TILDE) 45 | 38 0x0136 Ķ (LATIN CAPITAL LETTER K WITH CEDILLA) 46 | 39 0x00A7 § (SECTION SIGN) 47 | 40 0x013B Ļ (LATIN CAPITAL LETTER L WITH CEDILLA) 48 | 41 0x0110 Đ (LATIN CAPITAL LETTER D WITH STROKE) 49 | 42 0x0160 Š (LATIN CAPITAL LETTER S WITH CARON) 50 | 43 0x0166 Ŧ (LATIN CAPITAL LETTER T WITH STROKE) 51 | 44 0x017D Ž (LATIN CAPITAL LETTER Z WITH CARON) 52 | 45 0x00AD ­ (SOFT HYPHEN) 53 | 46 0x016A Ū (LATIN CAPITAL LETTER U WITH MACRON) 54 | 47 0x014A Ŋ (LATIN CAPITAL LETTER ENG) 55 | 48 0x00B0 ° (DEGREE SIGN) 56 | 49 0x0105 ą (LATIN SMALL LETTER A WITH OGONEK) 57 | 50 0x0113 ē (LATIN SMALL LETTER E WITH MACRON) 58 | 51 0x0123 ģ (LATIN SMALL LETTER G WITH CEDILLA) 59 | 52 0x012B ī (LATIN SMALL LETTER I WITH MACRON) 60 | 53 0x0129 ĩ (LATIN SMALL LETTER I WITH TILDE) 61 | 54 0x0137 ķ (LATIN SMALL LETTER K WITH CEDILLA) 62 | 55 0x00B7 · (MIDDLE DOT) 63 | 56 0x013C ļ (LATIN SMALL LETTER L WITH CEDILLA) 64 | 57 0x0111 đ (LATIN SMALL LETTER D WITH STROKE) 65 | 58 0x0161 š (LATIN SMALL LETTER S WITH CARON) 66 | 59 0x0167 ŧ (LATIN SMALL LETTER T WITH STROKE) 67 | 60 0x017E ž (LATIN SMALL LETTER Z WITH CARON) 68 | 61 0x2015 ― (HORIZONTAL BAR) 69 | 62 0x016B ū (LATIN SMALL LETTER U WITH MACRON) 70 | 63 0x014B ŋ (LATIN SMALL LETTER ENG) 71 | 64 0x0100 Ā (LATIN CAPITAL LETTER A WITH MACRON) 72 | 65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE) 73 | 66 0x00C2  (LATIN CAPITAL LETTER A WITH CIRCUMFLEX) 74 | 67 0x00C3 à (LATIN CAPITAL LETTER A WITH TILDE) 75 | 68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS) 76 | 69 0x00C5 Å (LATIN CAPITAL LETTER A WITH RING ABOVE) 77 | 70 0x00C6 Æ (LATIN CAPITAL LETTER AE) 78 | 71 0x012E Į (LATIN CAPITAL LETTER I WITH OGONEK) 79 | 72 0x010C Č (LATIN CAPITAL LETTER C WITH CARON) 80 | 73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE) 81 | 74 0x0118 Ę (LATIN CAPITAL LETTER E WITH OGONEK) 82 | 75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS) 83 | 76 0x0116 Ė (LATIN CAPITAL LETTER E WITH DOT ABOVE) 84 | 77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE) 85 | 78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX) 86 | 79 0x00CF Ï (LATIN CAPITAL LETTER I WITH DIAERESIS) 87 | 80 0x00D0 Ð (LATIN CAPITAL LETTER ETH) 88 | 81 0x0145 Ņ (LATIN CAPITAL LETTER N WITH CEDILLA) 89 | 82 0x014C Ō (LATIN CAPITAL LETTER O WITH MACRON) 90 | 83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE) 91 | 84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX) 92 | 85 0x00D5 Õ (LATIN CAPITAL LETTER O WITH TILDE) 93 | 86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS) 94 | 87 0x0168 Ũ (LATIN CAPITAL LETTER U WITH TILDE) 95 | 88 0x00D8 Ø (LATIN CAPITAL LETTER O WITH STROKE) 96 | 89 0x0172 Ų (LATIN CAPITAL LETTER U WITH OGONEK) 97 | 90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE) 98 | 91 0x00DB Û (LATIN CAPITAL LETTER U WITH CIRCUMFLEX) 99 | 92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS) 100 | 93 0x00DD Ý (LATIN CAPITAL LETTER Y WITH ACUTE) 101 | 94 0x00DE Þ (LATIN CAPITAL LETTER THORN) 102 | 95 0x00DF ß (LATIN SMALL LETTER SHARP S) 103 | 96 0x0101 ā (LATIN SMALL LETTER A WITH MACRON) 104 | 97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE) 105 | 98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX) 106 | 99 0x00E3 ã (LATIN SMALL LETTER A WITH TILDE) 107 | 100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS) 108 | 101 0x00E5 å (LATIN SMALL LETTER A WITH RING ABOVE) 109 | 102 0x00E6 æ (LATIN SMALL LETTER AE) 110 | 103 0x012F į (LATIN SMALL LETTER I WITH OGONEK) 111 | 104 0x010D č (LATIN SMALL LETTER C WITH CARON) 112 | 105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE) 113 | 106 0x0119 ę (LATIN SMALL LETTER E WITH OGONEK) 114 | 107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS) 115 | 108 0x0117 ė (LATIN SMALL LETTER E WITH DOT ABOVE) 116 | 109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE) 117 | 110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX) 118 | 111 0x00EF ï (LATIN SMALL LETTER I WITH DIAERESIS) 119 | 112 0x00F0 ð (LATIN SMALL LETTER ETH) 120 | 113 0x0146 ņ (LATIN SMALL LETTER N WITH CEDILLA) 121 | 114 0x014D ō (LATIN SMALL LETTER O WITH MACRON) 122 | 115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE) 123 | 116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX) 124 | 117 0x00F5 õ (LATIN SMALL LETTER O WITH TILDE) 125 | 118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS) 126 | 119 0x0169 ũ (LATIN SMALL LETTER U WITH TILDE) 127 | 120 0x00F8 ø (LATIN SMALL LETTER O WITH STROKE) 128 | 121 0x0173 ų (LATIN SMALL LETTER U WITH OGONEK) 129 | 122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE) 130 | 123 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX) 131 | 124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS) 132 | 125 0x00FD ý (LATIN SMALL LETTER Y WITH ACUTE) 133 | 126 0x00FE þ (LATIN SMALL LETTER THORN) 134 | 127 0x0138 ĸ (LATIN SMALL LETTER KRA) 135 | -------------------------------------------------------------------------------- /encodings/index-iso-8859-13.txt: -------------------------------------------------------------------------------- 1 | # For details on index index-iso-8859-13.txt see the Encoding Standard 2 | # https://encoding.spec.whatwg.org/ 3 | # 4 | # Identifier: 40736338e964ab520407cebcb01329f8d450abf6ce12bf88b74b655b60e43300 5 | # Date: 2018-01-06 6 | 7 | 0 0x0080 € () 8 | 1 0x0081  () 9 | 2 0x0082 ‚ () 10 | 3 0x0083 ƒ () 11 | 4 0x0084 „ () 12 | 5 0x0085 … () 13 | 6 0x0086 † () 14 | 7 0x0087 ‡ () 15 | 8 0x0088 ˆ () 16 | 9 0x0089 ‰ () 17 | 10 0x008A Š () 18 | 11 0x008B ‹ () 19 | 12 0x008C Œ () 20 | 13 0x008D  () 21 | 14 0x008E Ž () 22 | 15 0x008F  () 23 | 16 0x0090  () 24 | 17 0x0091 ‘ () 25 | 18 0x0092 ’ () 26 | 19 0x0093 “ () 27 | 20 0x0094 ” () 28 | 21 0x0095 • () 29 | 22 0x0096 – () 30 | 23 0x0097 — () 31 | 24 0x0098 ˜ () 32 | 25 0x0099 ™ () 33 | 26 0x009A š () 34 | 27 0x009B › () 35 | 28 0x009C œ () 36 | 29 0x009D  () 37 | 30 0x009E ž () 38 | 31 0x009F Ÿ () 39 | 32 0x00A0   (NO-BREAK SPACE) 40 | 33 0x201D ” (RIGHT DOUBLE QUOTATION MARK) 41 | 34 0x00A2 ¢ (CENT SIGN) 42 | 35 0x00A3 £ (POUND SIGN) 43 | 36 0x00A4 ¤ (CURRENCY SIGN) 44 | 37 0x201E „ (DOUBLE LOW-9 QUOTATION MARK) 45 | 38 0x00A6 ¦ (BROKEN BAR) 46 | 39 0x00A7 § (SECTION SIGN) 47 | 40 0x00D8 Ø (LATIN CAPITAL LETTER O WITH STROKE) 48 | 41 0x00A9 © (COPYRIGHT SIGN) 49 | 42 0x0156 Ŗ (LATIN CAPITAL LETTER R WITH CEDILLA) 50 | 43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) 51 | 44 0x00AC ¬ (NOT SIGN) 52 | 45 0x00AD ­ (SOFT HYPHEN) 53 | 46 0x00AE ® (REGISTERED SIGN) 54 | 47 0x00C6 Æ (LATIN CAPITAL LETTER AE) 55 | 48 0x00B0 ° (DEGREE SIGN) 56 | 49 0x00B1 ± (PLUS-MINUS SIGN) 57 | 50 0x00B2 ² (SUPERSCRIPT TWO) 58 | 51 0x00B3 ³ (SUPERSCRIPT THREE) 59 | 52 0x201C “ (LEFT DOUBLE QUOTATION MARK) 60 | 53 0x00B5 µ (MICRO SIGN) 61 | 54 0x00B6 ¶ (PILCROW SIGN) 62 | 55 0x00B7 · (MIDDLE DOT) 63 | 56 0x00F8 ø (LATIN SMALL LETTER O WITH STROKE) 64 | 57 0x00B9 ¹ (SUPERSCRIPT ONE) 65 | 58 0x0157 ŗ (LATIN SMALL LETTER R WITH CEDILLA) 66 | 59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) 67 | 60 0x00BC ¼ (VULGAR FRACTION ONE QUARTER) 68 | 61 0x00BD ½ (VULGAR FRACTION ONE HALF) 69 | 62 0x00BE ¾ (VULGAR FRACTION THREE QUARTERS) 70 | 63 0x00E6 æ (LATIN SMALL LETTER AE) 71 | 64 0x0104 Ą (LATIN CAPITAL LETTER A WITH OGONEK) 72 | 65 0x012E Į (LATIN CAPITAL LETTER I WITH OGONEK) 73 | 66 0x0100 Ā (LATIN CAPITAL LETTER A WITH MACRON) 74 | 67 0x0106 Ć (LATIN CAPITAL LETTER C WITH ACUTE) 75 | 68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS) 76 | 69 0x00C5 Å (LATIN CAPITAL LETTER A WITH RING ABOVE) 77 | 70 0x0118 Ę (LATIN CAPITAL LETTER E WITH OGONEK) 78 | 71 0x0112 Ē (LATIN CAPITAL LETTER E WITH MACRON) 79 | 72 0x010C Č (LATIN CAPITAL LETTER C WITH CARON) 80 | 73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE) 81 | 74 0x0179 Ź (LATIN CAPITAL LETTER Z WITH ACUTE) 82 | 75 0x0116 Ė (LATIN CAPITAL LETTER E WITH DOT ABOVE) 83 | 76 0x0122 Ģ (LATIN CAPITAL LETTER G WITH CEDILLA) 84 | 77 0x0136 Ķ (LATIN CAPITAL LETTER K WITH CEDILLA) 85 | 78 0x012A Ī (LATIN CAPITAL LETTER I WITH MACRON) 86 | 79 0x013B Ļ (LATIN CAPITAL LETTER L WITH CEDILLA) 87 | 80 0x0160 Š (LATIN CAPITAL LETTER S WITH CARON) 88 | 81 0x0143 Ń (LATIN CAPITAL LETTER N WITH ACUTE) 89 | 82 0x0145 Ņ (LATIN CAPITAL LETTER N WITH CEDILLA) 90 | 83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE) 91 | 84 0x014C Ō (LATIN CAPITAL LETTER O WITH MACRON) 92 | 85 0x00D5 Õ (LATIN CAPITAL LETTER O WITH TILDE) 93 | 86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS) 94 | 87 0x00D7 × (MULTIPLICATION SIGN) 95 | 88 0x0172 Ų (LATIN CAPITAL LETTER U WITH OGONEK) 96 | 89 0x0141 Ł (LATIN CAPITAL LETTER L WITH STROKE) 97 | 90 0x015A Ś (LATIN CAPITAL LETTER S WITH ACUTE) 98 | 91 0x016A Ū (LATIN CAPITAL LETTER U WITH MACRON) 99 | 92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS) 100 | 93 0x017B Ż (LATIN CAPITAL LETTER Z WITH DOT ABOVE) 101 | 94 0x017D Ž (LATIN CAPITAL LETTER Z WITH CARON) 102 | 95 0x00DF ß (LATIN SMALL LETTER SHARP S) 103 | 96 0x0105 ą (LATIN SMALL LETTER A WITH OGONEK) 104 | 97 0x012F į (LATIN SMALL LETTER I WITH OGONEK) 105 | 98 0x0101 ā (LATIN SMALL LETTER A WITH MACRON) 106 | 99 0x0107 ć (LATIN SMALL LETTER C WITH ACUTE) 107 | 100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS) 108 | 101 0x00E5 å (LATIN SMALL LETTER A WITH RING ABOVE) 109 | 102 0x0119 ę (LATIN SMALL LETTER E WITH OGONEK) 110 | 103 0x0113 ē (LATIN SMALL LETTER E WITH MACRON) 111 | 104 0x010D č (LATIN SMALL LETTER C WITH CARON) 112 | 105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE) 113 | 106 0x017A ź (LATIN SMALL LETTER Z WITH ACUTE) 114 | 107 0x0117 ė (LATIN SMALL LETTER E WITH DOT ABOVE) 115 | 108 0x0123 ģ (LATIN SMALL LETTER G WITH CEDILLA) 116 | 109 0x0137 ķ (LATIN SMALL LETTER K WITH CEDILLA) 117 | 110 0x012B ī (LATIN SMALL LETTER I WITH MACRON) 118 | 111 0x013C ļ (LATIN SMALL LETTER L WITH CEDILLA) 119 | 112 0x0161 š (LATIN SMALL LETTER S WITH CARON) 120 | 113 0x0144 ń (LATIN SMALL LETTER N WITH ACUTE) 121 | 114 0x0146 ņ (LATIN SMALL LETTER N WITH CEDILLA) 122 | 115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE) 123 | 116 0x014D ō (LATIN SMALL LETTER O WITH MACRON) 124 | 117 0x00F5 õ (LATIN SMALL LETTER O WITH TILDE) 125 | 118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS) 126 | 119 0x00F7 ÷ (DIVISION SIGN) 127 | 120 0x0173 ų (LATIN SMALL LETTER U WITH OGONEK) 128 | 121 0x0142 ł (LATIN SMALL LETTER L WITH STROKE) 129 | 122 0x015B ś (LATIN SMALL LETTER S WITH ACUTE) 130 | 123 0x016B ū (LATIN SMALL LETTER U WITH MACRON) 131 | 124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS) 132 | 125 0x017C ż (LATIN SMALL LETTER Z WITH DOT ABOVE) 133 | 126 0x017E ž (LATIN SMALL LETTER Z WITH CARON) 134 | 127 0x2019 ’ (RIGHT SINGLE QUOTATION MARK) 135 | -------------------------------------------------------------------------------- /encodings/index-iso-8859-14.txt: -------------------------------------------------------------------------------- 1 | # For details on index index-iso-8859-14.txt see the Encoding Standard 2 | # https://encoding.spec.whatwg.org/ 3 | # 4 | # Identifier: 2c8651cfc08b1f35b17919ee5379f2fa006af3ec809f11b3b7f470785580542b 5 | # Date: 2018-01-06 6 | 7 | 0 0x0080 € () 8 | 1 0x0081  () 9 | 2 0x0082 ‚ () 10 | 3 0x0083 ƒ () 11 | 4 0x0084 „ () 12 | 5 0x0085 … () 13 | 6 0x0086 † () 14 | 7 0x0087 ‡ () 15 | 8 0x0088 ˆ () 16 | 9 0x0089 ‰ () 17 | 10 0x008A Š () 18 | 11 0x008B ‹ () 19 | 12 0x008C Œ () 20 | 13 0x008D  () 21 | 14 0x008E Ž () 22 | 15 0x008F  () 23 | 16 0x0090  () 24 | 17 0x0091 ‘ () 25 | 18 0x0092 ’ () 26 | 19 0x0093 “ () 27 | 20 0x0094 ” () 28 | 21 0x0095 • () 29 | 22 0x0096 – () 30 | 23 0x0097 — () 31 | 24 0x0098 ˜ () 32 | 25 0x0099 ™ () 33 | 26 0x009A š () 34 | 27 0x009B › () 35 | 28 0x009C œ () 36 | 29 0x009D  () 37 | 30 0x009E ž () 38 | 31 0x009F Ÿ () 39 | 32 0x00A0   (NO-BREAK SPACE) 40 | 33 0x1E02 Ḃ (LATIN CAPITAL LETTER B WITH DOT ABOVE) 41 | 34 0x1E03 ḃ (LATIN SMALL LETTER B WITH DOT ABOVE) 42 | 35 0x00A3 £ (POUND SIGN) 43 | 36 0x010A Ċ (LATIN CAPITAL LETTER C WITH DOT ABOVE) 44 | 37 0x010B ċ (LATIN SMALL LETTER C WITH DOT ABOVE) 45 | 38 0x1E0A Ḋ (LATIN CAPITAL LETTER D WITH DOT ABOVE) 46 | 39 0x00A7 § (SECTION SIGN) 47 | 40 0x1E80 Ẁ (LATIN CAPITAL LETTER W WITH GRAVE) 48 | 41 0x00A9 © (COPYRIGHT SIGN) 49 | 42 0x1E82 Ẃ (LATIN CAPITAL LETTER W WITH ACUTE) 50 | 43 0x1E0B ḋ (LATIN SMALL LETTER D WITH DOT ABOVE) 51 | 44 0x1EF2 Ỳ (LATIN CAPITAL LETTER Y WITH GRAVE) 52 | 45 0x00AD ­ (SOFT HYPHEN) 53 | 46 0x00AE ® (REGISTERED SIGN) 54 | 47 0x0178 Ÿ (LATIN CAPITAL LETTER Y WITH DIAERESIS) 55 | 48 0x1E1E Ḟ (LATIN CAPITAL LETTER F WITH DOT ABOVE) 56 | 49 0x1E1F ḟ (LATIN SMALL LETTER F WITH DOT ABOVE) 57 | 50 0x0120 Ġ (LATIN CAPITAL LETTER G WITH DOT ABOVE) 58 | 51 0x0121 ġ (LATIN SMALL LETTER G WITH DOT ABOVE) 59 | 52 0x1E40 Ṁ (LATIN CAPITAL LETTER M WITH DOT ABOVE) 60 | 53 0x1E41 ṁ (LATIN SMALL LETTER M WITH DOT ABOVE) 61 | 54 0x00B6 ¶ (PILCROW SIGN) 62 | 55 0x1E56 Ṗ (LATIN CAPITAL LETTER P WITH DOT ABOVE) 63 | 56 0x1E81 ẁ (LATIN SMALL LETTER W WITH GRAVE) 64 | 57 0x1E57 ṗ (LATIN SMALL LETTER P WITH DOT ABOVE) 65 | 58 0x1E83 ẃ (LATIN SMALL LETTER W WITH ACUTE) 66 | 59 0x1E60 Ṡ (LATIN CAPITAL LETTER S WITH DOT ABOVE) 67 | 60 0x1EF3 ỳ (LATIN SMALL LETTER Y WITH GRAVE) 68 | 61 0x1E84 Ẅ (LATIN CAPITAL LETTER W WITH DIAERESIS) 69 | 62 0x1E85 ẅ (LATIN SMALL LETTER W WITH DIAERESIS) 70 | 63 0x1E61 ṡ (LATIN SMALL LETTER S WITH DOT ABOVE) 71 | 64 0x00C0 À (LATIN CAPITAL LETTER A WITH GRAVE) 72 | 65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE) 73 | 66 0x00C2  (LATIN CAPITAL LETTER A WITH CIRCUMFLEX) 74 | 67 0x00C3 à (LATIN CAPITAL LETTER A WITH TILDE) 75 | 68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS) 76 | 69 0x00C5 Å (LATIN CAPITAL LETTER A WITH RING ABOVE) 77 | 70 0x00C6 Æ (LATIN CAPITAL LETTER AE) 78 | 71 0x00C7 Ç (LATIN CAPITAL LETTER C WITH CEDILLA) 79 | 72 0x00C8 È (LATIN CAPITAL LETTER E WITH GRAVE) 80 | 73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE) 81 | 74 0x00CA Ê (LATIN CAPITAL LETTER E WITH CIRCUMFLEX) 82 | 75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS) 83 | 76 0x00CC Ì (LATIN CAPITAL LETTER I WITH GRAVE) 84 | 77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE) 85 | 78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX) 86 | 79 0x00CF Ï (LATIN CAPITAL LETTER I WITH DIAERESIS) 87 | 80 0x0174 Ŵ (LATIN CAPITAL LETTER W WITH CIRCUMFLEX) 88 | 81 0x00D1 Ñ (LATIN CAPITAL LETTER N WITH TILDE) 89 | 82 0x00D2 Ò (LATIN CAPITAL LETTER O WITH GRAVE) 90 | 83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE) 91 | 84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX) 92 | 85 0x00D5 Õ (LATIN CAPITAL LETTER O WITH TILDE) 93 | 86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS) 94 | 87 0x1E6A Ṫ (LATIN CAPITAL LETTER T WITH DOT ABOVE) 95 | 88 0x00D8 Ø (LATIN CAPITAL LETTER O WITH STROKE) 96 | 89 0x00D9 Ù (LATIN CAPITAL LETTER U WITH GRAVE) 97 | 90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE) 98 | 91 0x00DB Û (LATIN CAPITAL LETTER U WITH CIRCUMFLEX) 99 | 92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS) 100 | 93 0x00DD Ý (LATIN CAPITAL LETTER Y WITH ACUTE) 101 | 94 0x0176 Ŷ (LATIN CAPITAL LETTER Y WITH CIRCUMFLEX) 102 | 95 0x00DF ß (LATIN SMALL LETTER SHARP S) 103 | 96 0x00E0 à (LATIN SMALL LETTER A WITH GRAVE) 104 | 97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE) 105 | 98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX) 106 | 99 0x00E3 ã (LATIN SMALL LETTER A WITH TILDE) 107 | 100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS) 108 | 101 0x00E5 å (LATIN SMALL LETTER A WITH RING ABOVE) 109 | 102 0x00E6 æ (LATIN SMALL LETTER AE) 110 | 103 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA) 111 | 104 0x00E8 è (LATIN SMALL LETTER E WITH GRAVE) 112 | 105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE) 113 | 106 0x00EA ê (LATIN SMALL LETTER E WITH CIRCUMFLEX) 114 | 107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS) 115 | 108 0x00EC ì (LATIN SMALL LETTER I WITH GRAVE) 116 | 109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE) 117 | 110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX) 118 | 111 0x00EF ï (LATIN SMALL LETTER I WITH DIAERESIS) 119 | 112 0x0175 ŵ (LATIN SMALL LETTER W WITH CIRCUMFLEX) 120 | 113 0x00F1 ñ (LATIN SMALL LETTER N WITH TILDE) 121 | 114 0x00F2 ò (LATIN SMALL LETTER O WITH GRAVE) 122 | 115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE) 123 | 116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX) 124 | 117 0x00F5 õ (LATIN SMALL LETTER O WITH TILDE) 125 | 118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS) 126 | 119 0x1E6B ṫ (LATIN SMALL LETTER T WITH DOT ABOVE) 127 | 120 0x00F8 ø (LATIN SMALL LETTER O WITH STROKE) 128 | 121 0x00F9 ù (LATIN SMALL LETTER U WITH GRAVE) 129 | 122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE) 130 | 123 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX) 131 | 124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS) 132 | 125 0x00FD ý (LATIN SMALL LETTER Y WITH ACUTE) 133 | 126 0x0177 ŷ (LATIN SMALL LETTER Y WITH CIRCUMFLEX) 134 | 127 0x00FF ÿ (LATIN SMALL LETTER Y WITH DIAERESIS) 135 | -------------------------------------------------------------------------------- /encodings/index-iso-8859-15.txt: -------------------------------------------------------------------------------- 1 | # For details on index index-iso-8859-15.txt see the Encoding Standard 2 | # https://encoding.spec.whatwg.org/ 3 | # 4 | # Identifier: a560aba47bccd7510a6ac77f671fe75dca3800f05cf6d676910c311a8f8ff079 5 | # Date: 2018-01-06 6 | 7 | 0 0x0080 € () 8 | 1 0x0081  () 9 | 2 0x0082 ‚ () 10 | 3 0x0083 ƒ () 11 | 4 0x0084 „ () 12 | 5 0x0085 … () 13 | 6 0x0086 † () 14 | 7 0x0087 ‡ () 15 | 8 0x0088 ˆ () 16 | 9 0x0089 ‰ () 17 | 10 0x008A Š () 18 | 11 0x008B ‹ () 19 | 12 0x008C Œ () 20 | 13 0x008D  () 21 | 14 0x008E Ž () 22 | 15 0x008F  () 23 | 16 0x0090  () 24 | 17 0x0091 ‘ () 25 | 18 0x0092 ’ () 26 | 19 0x0093 “ () 27 | 20 0x0094 ” () 28 | 21 0x0095 • () 29 | 22 0x0096 – () 30 | 23 0x0097 — () 31 | 24 0x0098 ˜ () 32 | 25 0x0099 ™ () 33 | 26 0x009A š () 34 | 27 0x009B › () 35 | 28 0x009C œ () 36 | 29 0x009D  () 37 | 30 0x009E ž () 38 | 31 0x009F Ÿ () 39 | 32 0x00A0   (NO-BREAK SPACE) 40 | 33 0x00A1 ¡ (INVERTED EXCLAMATION MARK) 41 | 34 0x00A2 ¢ (CENT SIGN) 42 | 35 0x00A3 £ (POUND SIGN) 43 | 36 0x20AC € (EURO SIGN) 44 | 37 0x00A5 ¥ (YEN SIGN) 45 | 38 0x0160 Š (LATIN CAPITAL LETTER S WITH CARON) 46 | 39 0x00A7 § (SECTION SIGN) 47 | 40 0x0161 š (LATIN SMALL LETTER S WITH CARON) 48 | 41 0x00A9 © (COPYRIGHT SIGN) 49 | 42 0x00AA ª (FEMININE ORDINAL INDICATOR) 50 | 43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) 51 | 44 0x00AC ¬ (NOT SIGN) 52 | 45 0x00AD ­ (SOFT HYPHEN) 53 | 46 0x00AE ® (REGISTERED SIGN) 54 | 47 0x00AF ¯ (MACRON) 55 | 48 0x00B0 ° (DEGREE SIGN) 56 | 49 0x00B1 ± (PLUS-MINUS SIGN) 57 | 50 0x00B2 ² (SUPERSCRIPT TWO) 58 | 51 0x00B3 ³ (SUPERSCRIPT THREE) 59 | 52 0x017D Ž (LATIN CAPITAL LETTER Z WITH CARON) 60 | 53 0x00B5 µ (MICRO SIGN) 61 | 54 0x00B6 ¶ (PILCROW SIGN) 62 | 55 0x00B7 · (MIDDLE DOT) 63 | 56 0x017E ž (LATIN SMALL LETTER Z WITH CARON) 64 | 57 0x00B9 ¹ (SUPERSCRIPT ONE) 65 | 58 0x00BA º (MASCULINE ORDINAL INDICATOR) 66 | 59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) 67 | 60 0x0152 Œ (LATIN CAPITAL LIGATURE OE) 68 | 61 0x0153 œ (LATIN SMALL LIGATURE OE) 69 | 62 0x0178 Ÿ (LATIN CAPITAL LETTER Y WITH DIAERESIS) 70 | 63 0x00BF ¿ (INVERTED QUESTION MARK) 71 | 64 0x00C0 À (LATIN CAPITAL LETTER A WITH GRAVE) 72 | 65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE) 73 | 66 0x00C2  (LATIN CAPITAL LETTER A WITH CIRCUMFLEX) 74 | 67 0x00C3 à (LATIN CAPITAL LETTER A WITH TILDE) 75 | 68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS) 76 | 69 0x00C5 Å (LATIN CAPITAL LETTER A WITH RING ABOVE) 77 | 70 0x00C6 Æ (LATIN CAPITAL LETTER AE) 78 | 71 0x00C7 Ç (LATIN CAPITAL LETTER C WITH CEDILLA) 79 | 72 0x00C8 È (LATIN CAPITAL LETTER E WITH GRAVE) 80 | 73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE) 81 | 74 0x00CA Ê (LATIN CAPITAL LETTER E WITH CIRCUMFLEX) 82 | 75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS) 83 | 76 0x00CC Ì (LATIN CAPITAL LETTER I WITH GRAVE) 84 | 77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE) 85 | 78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX) 86 | 79 0x00CF Ï (LATIN CAPITAL LETTER I WITH DIAERESIS) 87 | 80 0x00D0 Ð (LATIN CAPITAL LETTER ETH) 88 | 81 0x00D1 Ñ (LATIN CAPITAL LETTER N WITH TILDE) 89 | 82 0x00D2 Ò (LATIN CAPITAL LETTER O WITH GRAVE) 90 | 83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE) 91 | 84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX) 92 | 85 0x00D5 Õ (LATIN CAPITAL LETTER O WITH TILDE) 93 | 86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS) 94 | 87 0x00D7 × (MULTIPLICATION SIGN) 95 | 88 0x00D8 Ø (LATIN CAPITAL LETTER O WITH STROKE) 96 | 89 0x00D9 Ù (LATIN CAPITAL LETTER U WITH GRAVE) 97 | 90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE) 98 | 91 0x00DB Û (LATIN CAPITAL LETTER U WITH CIRCUMFLEX) 99 | 92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS) 100 | 93 0x00DD Ý (LATIN CAPITAL LETTER Y WITH ACUTE) 101 | 94 0x00DE Þ (LATIN CAPITAL LETTER THORN) 102 | 95 0x00DF ß (LATIN SMALL LETTER SHARP S) 103 | 96 0x00E0 à (LATIN SMALL LETTER A WITH GRAVE) 104 | 97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE) 105 | 98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX) 106 | 99 0x00E3 ã (LATIN SMALL LETTER A WITH TILDE) 107 | 100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS) 108 | 101 0x00E5 å (LATIN SMALL LETTER A WITH RING ABOVE) 109 | 102 0x00E6 æ (LATIN SMALL LETTER AE) 110 | 103 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA) 111 | 104 0x00E8 è (LATIN SMALL LETTER E WITH GRAVE) 112 | 105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE) 113 | 106 0x00EA ê (LATIN SMALL LETTER E WITH CIRCUMFLEX) 114 | 107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS) 115 | 108 0x00EC ì (LATIN SMALL LETTER I WITH GRAVE) 116 | 109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE) 117 | 110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX) 118 | 111 0x00EF ï (LATIN SMALL LETTER I WITH DIAERESIS) 119 | 112 0x00F0 ð (LATIN SMALL LETTER ETH) 120 | 113 0x00F1 ñ (LATIN SMALL LETTER N WITH TILDE) 121 | 114 0x00F2 ò (LATIN SMALL LETTER O WITH GRAVE) 122 | 115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE) 123 | 116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX) 124 | 117 0x00F5 õ (LATIN SMALL LETTER O WITH TILDE) 125 | 118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS) 126 | 119 0x00F7 ÷ (DIVISION SIGN) 127 | 120 0x00F8 ø (LATIN SMALL LETTER O WITH STROKE) 128 | 121 0x00F9 ù (LATIN SMALL LETTER U WITH GRAVE) 129 | 122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE) 130 | 123 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX) 131 | 124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS) 132 | 125 0x00FD ý (LATIN SMALL LETTER Y WITH ACUTE) 133 | 126 0x00FE þ (LATIN SMALL LETTER THORN) 134 | 127 0x00FF ÿ (LATIN SMALL LETTER Y WITH DIAERESIS) 135 | -------------------------------------------------------------------------------- /encodings/index-iso-8859-16.txt: -------------------------------------------------------------------------------- 1 | # For details on index index-iso-8859-16.txt see the Encoding Standard 2 | # https://encoding.spec.whatwg.org/ 3 | # 4 | # Identifier: 55676320d2d1b6e6909f5b3d741a7cf0cefc84e920aa4474afc091459111c2e3 5 | # Date: 2018-01-06 6 | 7 | 0 0x0080 € () 8 | 1 0x0081  () 9 | 2 0x0082 ‚ () 10 | 3 0x0083 ƒ () 11 | 4 0x0084 „ () 12 | 5 0x0085 … () 13 | 6 0x0086 † () 14 | 7 0x0087 ‡ () 15 | 8 0x0088 ˆ () 16 | 9 0x0089 ‰ () 17 | 10 0x008A Š () 18 | 11 0x008B ‹ () 19 | 12 0x008C Œ () 20 | 13 0x008D  () 21 | 14 0x008E Ž () 22 | 15 0x008F  () 23 | 16 0x0090  () 24 | 17 0x0091 ‘ () 25 | 18 0x0092 ’ () 26 | 19 0x0093 “ () 27 | 20 0x0094 ” () 28 | 21 0x0095 • () 29 | 22 0x0096 – () 30 | 23 0x0097 — () 31 | 24 0x0098 ˜ () 32 | 25 0x0099 ™ () 33 | 26 0x009A š () 34 | 27 0x009B › () 35 | 28 0x009C œ () 36 | 29 0x009D  () 37 | 30 0x009E ž () 38 | 31 0x009F Ÿ () 39 | 32 0x00A0   (NO-BREAK SPACE) 40 | 33 0x0104 Ą (LATIN CAPITAL LETTER A WITH OGONEK) 41 | 34 0x0105 ą (LATIN SMALL LETTER A WITH OGONEK) 42 | 35 0x0141 Ł (LATIN CAPITAL LETTER L WITH STROKE) 43 | 36 0x20AC € (EURO SIGN) 44 | 37 0x201E „ (DOUBLE LOW-9 QUOTATION MARK) 45 | 38 0x0160 Š (LATIN CAPITAL LETTER S WITH CARON) 46 | 39 0x00A7 § (SECTION SIGN) 47 | 40 0x0161 š (LATIN SMALL LETTER S WITH CARON) 48 | 41 0x00A9 © (COPYRIGHT SIGN) 49 | 42 0x0218 Ș (LATIN CAPITAL LETTER S WITH COMMA BELOW) 50 | 43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) 51 | 44 0x0179 Ź (LATIN CAPITAL LETTER Z WITH ACUTE) 52 | 45 0x00AD ­ (SOFT HYPHEN) 53 | 46 0x017A ź (LATIN SMALL LETTER Z WITH ACUTE) 54 | 47 0x017B Ż (LATIN CAPITAL LETTER Z WITH DOT ABOVE) 55 | 48 0x00B0 ° (DEGREE SIGN) 56 | 49 0x00B1 ± (PLUS-MINUS SIGN) 57 | 50 0x010C Č (LATIN CAPITAL LETTER C WITH CARON) 58 | 51 0x0142 ł (LATIN SMALL LETTER L WITH STROKE) 59 | 52 0x017D Ž (LATIN CAPITAL LETTER Z WITH CARON) 60 | 53 0x201D ” (RIGHT DOUBLE QUOTATION MARK) 61 | 54 0x00B6 ¶ (PILCROW SIGN) 62 | 55 0x00B7 · (MIDDLE DOT) 63 | 56 0x017E ž (LATIN SMALL LETTER Z WITH CARON) 64 | 57 0x010D č (LATIN SMALL LETTER C WITH CARON) 65 | 58 0x0219 ș (LATIN SMALL LETTER S WITH COMMA BELOW) 66 | 59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) 67 | 60 0x0152 Œ (LATIN CAPITAL LIGATURE OE) 68 | 61 0x0153 œ (LATIN SMALL LIGATURE OE) 69 | 62 0x0178 Ÿ (LATIN CAPITAL LETTER Y WITH DIAERESIS) 70 | 63 0x017C ż (LATIN SMALL LETTER Z WITH DOT ABOVE) 71 | 64 0x00C0 À (LATIN CAPITAL LETTER A WITH GRAVE) 72 | 65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE) 73 | 66 0x00C2  (LATIN CAPITAL LETTER A WITH CIRCUMFLEX) 74 | 67 0x0102 Ă (LATIN CAPITAL LETTER A WITH BREVE) 75 | 68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS) 76 | 69 0x0106 Ć (LATIN CAPITAL LETTER C WITH ACUTE) 77 | 70 0x00C6 Æ (LATIN CAPITAL LETTER AE) 78 | 71 0x00C7 Ç (LATIN CAPITAL LETTER C WITH CEDILLA) 79 | 72 0x00C8 È (LATIN CAPITAL LETTER E WITH GRAVE) 80 | 73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE) 81 | 74 0x00CA Ê (LATIN CAPITAL LETTER E WITH CIRCUMFLEX) 82 | 75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS) 83 | 76 0x00CC Ì (LATIN CAPITAL LETTER I WITH GRAVE) 84 | 77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE) 85 | 78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX) 86 | 79 0x00CF Ï (LATIN CAPITAL LETTER I WITH DIAERESIS) 87 | 80 0x0110 Đ (LATIN CAPITAL LETTER D WITH STROKE) 88 | 81 0x0143 Ń (LATIN CAPITAL LETTER N WITH ACUTE) 89 | 82 0x00D2 Ò (LATIN CAPITAL LETTER O WITH GRAVE) 90 | 83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE) 91 | 84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX) 92 | 85 0x0150 Ő (LATIN CAPITAL LETTER O WITH DOUBLE ACUTE) 93 | 86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS) 94 | 87 0x015A Ś (LATIN CAPITAL LETTER S WITH ACUTE) 95 | 88 0x0170 Ű (LATIN CAPITAL LETTER U WITH DOUBLE ACUTE) 96 | 89 0x00D9 Ù (LATIN CAPITAL LETTER U WITH GRAVE) 97 | 90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE) 98 | 91 0x00DB Û (LATIN CAPITAL LETTER U WITH CIRCUMFLEX) 99 | 92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS) 100 | 93 0x0118 Ę (LATIN CAPITAL LETTER E WITH OGONEK) 101 | 94 0x021A Ț (LATIN CAPITAL LETTER T WITH COMMA BELOW) 102 | 95 0x00DF ß (LATIN SMALL LETTER SHARP S) 103 | 96 0x00E0 à (LATIN SMALL LETTER A WITH GRAVE) 104 | 97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE) 105 | 98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX) 106 | 99 0x0103 ă (LATIN SMALL LETTER A WITH BREVE) 107 | 100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS) 108 | 101 0x0107 ć (LATIN SMALL LETTER C WITH ACUTE) 109 | 102 0x00E6 æ (LATIN SMALL LETTER AE) 110 | 103 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA) 111 | 104 0x00E8 è (LATIN SMALL LETTER E WITH GRAVE) 112 | 105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE) 113 | 106 0x00EA ê (LATIN SMALL LETTER E WITH CIRCUMFLEX) 114 | 107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS) 115 | 108 0x00EC ì (LATIN SMALL LETTER I WITH GRAVE) 116 | 109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE) 117 | 110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX) 118 | 111 0x00EF ï (LATIN SMALL LETTER I WITH DIAERESIS) 119 | 112 0x0111 đ (LATIN SMALL LETTER D WITH STROKE) 120 | 113 0x0144 ń (LATIN SMALL LETTER N WITH ACUTE) 121 | 114 0x00F2 ò (LATIN SMALL LETTER O WITH GRAVE) 122 | 115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE) 123 | 116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX) 124 | 117 0x0151 ő (LATIN SMALL LETTER O WITH DOUBLE ACUTE) 125 | 118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS) 126 | 119 0x015B ś (LATIN SMALL LETTER S WITH ACUTE) 127 | 120 0x0171 ű (LATIN SMALL LETTER U WITH DOUBLE ACUTE) 128 | 121 0x00F9 ù (LATIN SMALL LETTER U WITH GRAVE) 129 | 122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE) 130 | 123 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX) 131 | 124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS) 132 | 125 0x0119 ę (LATIN SMALL LETTER E WITH OGONEK) 133 | 126 0x021B ț (LATIN SMALL LETTER T WITH COMMA BELOW) 134 | 127 0x00FF ÿ (LATIN SMALL LETTER Y WITH DIAERESIS) 135 | -------------------------------------------------------------------------------- /encodings/index-iso-8859-2.txt: -------------------------------------------------------------------------------- 1 | # For details on index index-iso-8859-2.txt see the Encoding Standard 2 | # https://encoding.spec.whatwg.org/ 3 | # 4 | # Identifier: 9569c67f22d0b57790e1c407c6eecf227e4562322dc296de43cdab7a0152ec73 5 | # Date: 2018-01-06 6 | 7 | 0 0x0080 € () 8 | 1 0x0081  () 9 | 2 0x0082 ‚ () 10 | 3 0x0083 ƒ () 11 | 4 0x0084 „ () 12 | 5 0x0085 … () 13 | 6 0x0086 † () 14 | 7 0x0087 ‡ () 15 | 8 0x0088 ˆ () 16 | 9 0x0089 ‰ () 17 | 10 0x008A Š () 18 | 11 0x008B ‹ () 19 | 12 0x008C Œ () 20 | 13 0x008D  () 21 | 14 0x008E Ž () 22 | 15 0x008F  () 23 | 16 0x0090  () 24 | 17 0x0091 ‘ () 25 | 18 0x0092 ’ () 26 | 19 0x0093 “ () 27 | 20 0x0094 ” () 28 | 21 0x0095 • () 29 | 22 0x0096 – () 30 | 23 0x0097 — () 31 | 24 0x0098 ˜ () 32 | 25 0x0099 ™ () 33 | 26 0x009A š () 34 | 27 0x009B › () 35 | 28 0x009C œ () 36 | 29 0x009D  () 37 | 30 0x009E ž () 38 | 31 0x009F Ÿ () 39 | 32 0x00A0   (NO-BREAK SPACE) 40 | 33 0x0104 Ą (LATIN CAPITAL LETTER A WITH OGONEK) 41 | 34 0x02D8 ˘ (BREVE) 42 | 35 0x0141 Ł (LATIN CAPITAL LETTER L WITH STROKE) 43 | 36 0x00A4 ¤ (CURRENCY SIGN) 44 | 37 0x013D Ľ (LATIN CAPITAL LETTER L WITH CARON) 45 | 38 0x015A Ś (LATIN CAPITAL LETTER S WITH ACUTE) 46 | 39 0x00A7 § (SECTION SIGN) 47 | 40 0x00A8 ¨ (DIAERESIS) 48 | 41 0x0160 Š (LATIN CAPITAL LETTER S WITH CARON) 49 | 42 0x015E Ş (LATIN CAPITAL LETTER S WITH CEDILLA) 50 | 43 0x0164 Ť (LATIN CAPITAL LETTER T WITH CARON) 51 | 44 0x0179 Ź (LATIN CAPITAL LETTER Z WITH ACUTE) 52 | 45 0x00AD ­ (SOFT HYPHEN) 53 | 46 0x017D Ž (LATIN CAPITAL LETTER Z WITH CARON) 54 | 47 0x017B Ż (LATIN CAPITAL LETTER Z WITH DOT ABOVE) 55 | 48 0x00B0 ° (DEGREE SIGN) 56 | 49 0x0105 ą (LATIN SMALL LETTER A WITH OGONEK) 57 | 50 0x02DB ˛ (OGONEK) 58 | 51 0x0142 ł (LATIN SMALL LETTER L WITH STROKE) 59 | 52 0x00B4 ´ (ACUTE ACCENT) 60 | 53 0x013E ľ (LATIN SMALL LETTER L WITH CARON) 61 | 54 0x015B ś (LATIN SMALL LETTER S WITH ACUTE) 62 | 55 0x02C7 ˇ (CARON) 63 | 56 0x00B8 ¸ (CEDILLA) 64 | 57 0x0161 š (LATIN SMALL LETTER S WITH CARON) 65 | 58 0x015F ş (LATIN SMALL LETTER S WITH CEDILLA) 66 | 59 0x0165 ť (LATIN SMALL LETTER T WITH CARON) 67 | 60 0x017A ź (LATIN SMALL LETTER Z WITH ACUTE) 68 | 61 0x02DD ˝ (DOUBLE ACUTE ACCENT) 69 | 62 0x017E ž (LATIN SMALL LETTER Z WITH CARON) 70 | 63 0x017C ż (LATIN SMALL LETTER Z WITH DOT ABOVE) 71 | 64 0x0154 Ŕ (LATIN CAPITAL LETTER R WITH ACUTE) 72 | 65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE) 73 | 66 0x00C2  (LATIN CAPITAL LETTER A WITH CIRCUMFLEX) 74 | 67 0x0102 Ă (LATIN CAPITAL LETTER A WITH BREVE) 75 | 68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS) 76 | 69 0x0139 Ĺ (LATIN CAPITAL LETTER L WITH ACUTE) 77 | 70 0x0106 Ć (LATIN CAPITAL LETTER C WITH ACUTE) 78 | 71 0x00C7 Ç (LATIN CAPITAL LETTER C WITH CEDILLA) 79 | 72 0x010C Č (LATIN CAPITAL LETTER C WITH CARON) 80 | 73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE) 81 | 74 0x0118 Ę (LATIN CAPITAL LETTER E WITH OGONEK) 82 | 75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS) 83 | 76 0x011A Ě (LATIN CAPITAL LETTER E WITH CARON) 84 | 77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE) 85 | 78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX) 86 | 79 0x010E Ď (LATIN CAPITAL LETTER D WITH CARON) 87 | 80 0x0110 Đ (LATIN CAPITAL LETTER D WITH STROKE) 88 | 81 0x0143 Ń (LATIN CAPITAL LETTER N WITH ACUTE) 89 | 82 0x0147 Ň (LATIN CAPITAL LETTER N WITH CARON) 90 | 83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE) 91 | 84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX) 92 | 85 0x0150 Ő (LATIN CAPITAL LETTER O WITH DOUBLE ACUTE) 93 | 86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS) 94 | 87 0x00D7 × (MULTIPLICATION SIGN) 95 | 88 0x0158 Ř (LATIN CAPITAL LETTER R WITH CARON) 96 | 89 0x016E Ů (LATIN CAPITAL LETTER U WITH RING ABOVE) 97 | 90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE) 98 | 91 0x0170 Ű (LATIN CAPITAL LETTER U WITH DOUBLE ACUTE) 99 | 92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS) 100 | 93 0x00DD Ý (LATIN CAPITAL LETTER Y WITH ACUTE) 101 | 94 0x0162 Ţ (LATIN CAPITAL LETTER T WITH CEDILLA) 102 | 95 0x00DF ß (LATIN SMALL LETTER SHARP S) 103 | 96 0x0155 ŕ (LATIN SMALL LETTER R WITH ACUTE) 104 | 97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE) 105 | 98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX) 106 | 99 0x0103 ă (LATIN SMALL LETTER A WITH BREVE) 107 | 100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS) 108 | 101 0x013A ĺ (LATIN SMALL LETTER L WITH ACUTE) 109 | 102 0x0107 ć (LATIN SMALL LETTER C WITH ACUTE) 110 | 103 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA) 111 | 104 0x010D č (LATIN SMALL LETTER C WITH CARON) 112 | 105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE) 113 | 106 0x0119 ę (LATIN SMALL LETTER E WITH OGONEK) 114 | 107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS) 115 | 108 0x011B ě (LATIN SMALL LETTER E WITH CARON) 116 | 109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE) 117 | 110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX) 118 | 111 0x010F ď (LATIN SMALL LETTER D WITH CARON) 119 | 112 0x0111 đ (LATIN SMALL LETTER D WITH STROKE) 120 | 113 0x0144 ń (LATIN SMALL LETTER N WITH ACUTE) 121 | 114 0x0148 ň (LATIN SMALL LETTER N WITH CARON) 122 | 115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE) 123 | 116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX) 124 | 117 0x0151 ő (LATIN SMALL LETTER O WITH DOUBLE ACUTE) 125 | 118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS) 126 | 119 0x00F7 ÷ (DIVISION SIGN) 127 | 120 0x0159 ř (LATIN SMALL LETTER R WITH CARON) 128 | 121 0x016F ů (LATIN SMALL LETTER U WITH RING ABOVE) 129 | 122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE) 130 | 123 0x0171 ű (LATIN SMALL LETTER U WITH DOUBLE ACUTE) 131 | 124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS) 132 | 125 0x00FD ý (LATIN SMALL LETTER Y WITH ACUTE) 133 | 126 0x0163 ţ (LATIN SMALL LETTER T WITH CEDILLA) 134 | 127 0x02D9 ˙ (DOT ABOVE) 135 | -------------------------------------------------------------------------------- /encodings/index-iso-8859-3.txt: -------------------------------------------------------------------------------- 1 | # For details on index index-iso-8859-3.txt see the Encoding Standard 2 | # https://encoding.spec.whatwg.org/ 3 | # 4 | # Identifier: af8f1e12df79b768322b5e83613698cdc619438270a2fc359554331c805054a3 5 | # Date: 2018-01-06 6 | 7 | 0 0x0080 € () 8 | 1 0x0081  () 9 | 2 0x0082 ‚ () 10 | 3 0x0083 ƒ () 11 | 4 0x0084 „ () 12 | 5 0x0085 … () 13 | 6 0x0086 † () 14 | 7 0x0087 ‡ () 15 | 8 0x0088 ˆ () 16 | 9 0x0089 ‰ () 17 | 10 0x008A Š () 18 | 11 0x008B ‹ () 19 | 12 0x008C Œ () 20 | 13 0x008D  () 21 | 14 0x008E Ž () 22 | 15 0x008F  () 23 | 16 0x0090  () 24 | 17 0x0091 ‘ () 25 | 18 0x0092 ’ () 26 | 19 0x0093 “ () 27 | 20 0x0094 ” () 28 | 21 0x0095 • () 29 | 22 0x0096 – () 30 | 23 0x0097 — () 31 | 24 0x0098 ˜ () 32 | 25 0x0099 ™ () 33 | 26 0x009A š () 34 | 27 0x009B › () 35 | 28 0x009C œ () 36 | 29 0x009D  () 37 | 30 0x009E ž () 38 | 31 0x009F Ÿ () 39 | 32 0x00A0   (NO-BREAK SPACE) 40 | 33 0x0126 Ħ (LATIN CAPITAL LETTER H WITH STROKE) 41 | 34 0x02D8 ˘ (BREVE) 42 | 35 0x00A3 £ (POUND SIGN) 43 | 36 0x00A4 ¤ (CURRENCY SIGN) 44 | 38 0x0124 Ĥ (LATIN CAPITAL LETTER H WITH CIRCUMFLEX) 45 | 39 0x00A7 § (SECTION SIGN) 46 | 40 0x00A8 ¨ (DIAERESIS) 47 | 41 0x0130 İ (LATIN CAPITAL LETTER I WITH DOT ABOVE) 48 | 42 0x015E Ş (LATIN CAPITAL LETTER S WITH CEDILLA) 49 | 43 0x011E Ğ (LATIN CAPITAL LETTER G WITH BREVE) 50 | 44 0x0134 Ĵ (LATIN CAPITAL LETTER J WITH CIRCUMFLEX) 51 | 45 0x00AD ­ (SOFT HYPHEN) 52 | 47 0x017B Ż (LATIN CAPITAL LETTER Z WITH DOT ABOVE) 53 | 48 0x00B0 ° (DEGREE SIGN) 54 | 49 0x0127 ħ (LATIN SMALL LETTER H WITH STROKE) 55 | 50 0x00B2 ² (SUPERSCRIPT TWO) 56 | 51 0x00B3 ³ (SUPERSCRIPT THREE) 57 | 52 0x00B4 ´ (ACUTE ACCENT) 58 | 53 0x00B5 µ (MICRO SIGN) 59 | 54 0x0125 ĥ (LATIN SMALL LETTER H WITH CIRCUMFLEX) 60 | 55 0x00B7 · (MIDDLE DOT) 61 | 56 0x00B8 ¸ (CEDILLA) 62 | 57 0x0131 ı (LATIN SMALL LETTER DOTLESS I) 63 | 58 0x015F ş (LATIN SMALL LETTER S WITH CEDILLA) 64 | 59 0x011F ğ (LATIN SMALL LETTER G WITH BREVE) 65 | 60 0x0135 ĵ (LATIN SMALL LETTER J WITH CIRCUMFLEX) 66 | 61 0x00BD ½ (VULGAR FRACTION ONE HALF) 67 | 63 0x017C ż (LATIN SMALL LETTER Z WITH DOT ABOVE) 68 | 64 0x00C0 À (LATIN CAPITAL LETTER A WITH GRAVE) 69 | 65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE) 70 | 66 0x00C2  (LATIN CAPITAL LETTER A WITH CIRCUMFLEX) 71 | 68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS) 72 | 69 0x010A Ċ (LATIN CAPITAL LETTER C WITH DOT ABOVE) 73 | 70 0x0108 Ĉ (LATIN CAPITAL LETTER C WITH CIRCUMFLEX) 74 | 71 0x00C7 Ç (LATIN CAPITAL LETTER C WITH CEDILLA) 75 | 72 0x00C8 È (LATIN CAPITAL LETTER E WITH GRAVE) 76 | 73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE) 77 | 74 0x00CA Ê (LATIN CAPITAL LETTER E WITH CIRCUMFLEX) 78 | 75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS) 79 | 76 0x00CC Ì (LATIN CAPITAL LETTER I WITH GRAVE) 80 | 77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE) 81 | 78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX) 82 | 79 0x00CF Ï (LATIN CAPITAL LETTER I WITH DIAERESIS) 83 | 81 0x00D1 Ñ (LATIN CAPITAL LETTER N WITH TILDE) 84 | 82 0x00D2 Ò (LATIN CAPITAL LETTER O WITH GRAVE) 85 | 83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE) 86 | 84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX) 87 | 85 0x0120 Ġ (LATIN CAPITAL LETTER G WITH DOT ABOVE) 88 | 86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS) 89 | 87 0x00D7 × (MULTIPLICATION SIGN) 90 | 88 0x011C Ĝ (LATIN CAPITAL LETTER G WITH CIRCUMFLEX) 91 | 89 0x00D9 Ù (LATIN CAPITAL LETTER U WITH GRAVE) 92 | 90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE) 93 | 91 0x00DB Û (LATIN CAPITAL LETTER U WITH CIRCUMFLEX) 94 | 92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS) 95 | 93 0x016C Ŭ (LATIN CAPITAL LETTER U WITH BREVE) 96 | 94 0x015C Ŝ (LATIN CAPITAL LETTER S WITH CIRCUMFLEX) 97 | 95 0x00DF ß (LATIN SMALL LETTER SHARP S) 98 | 96 0x00E0 à (LATIN SMALL LETTER A WITH GRAVE) 99 | 97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE) 100 | 98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX) 101 | 100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS) 102 | 101 0x010B ċ (LATIN SMALL LETTER C WITH DOT ABOVE) 103 | 102 0x0109 ĉ (LATIN SMALL LETTER C WITH CIRCUMFLEX) 104 | 103 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA) 105 | 104 0x00E8 è (LATIN SMALL LETTER E WITH GRAVE) 106 | 105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE) 107 | 106 0x00EA ê (LATIN SMALL LETTER E WITH CIRCUMFLEX) 108 | 107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS) 109 | 108 0x00EC ì (LATIN SMALL LETTER I WITH GRAVE) 110 | 109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE) 111 | 110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX) 112 | 111 0x00EF ï (LATIN SMALL LETTER I WITH DIAERESIS) 113 | 113 0x00F1 ñ (LATIN SMALL LETTER N WITH TILDE) 114 | 114 0x00F2 ò (LATIN SMALL LETTER O WITH GRAVE) 115 | 115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE) 116 | 116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX) 117 | 117 0x0121 ġ (LATIN SMALL LETTER G WITH DOT ABOVE) 118 | 118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS) 119 | 119 0x00F7 ÷ (DIVISION SIGN) 120 | 120 0x011D ĝ (LATIN SMALL LETTER G WITH CIRCUMFLEX) 121 | 121 0x00F9 ù (LATIN SMALL LETTER U WITH GRAVE) 122 | 122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE) 123 | 123 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX) 124 | 124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS) 125 | 125 0x016D ŭ (LATIN SMALL LETTER U WITH BREVE) 126 | 126 0x015D ŝ (LATIN SMALL LETTER S WITH CIRCUMFLEX) 127 | 127 0x02D9 ˙ (DOT ABOVE) 128 | -------------------------------------------------------------------------------- /encodings/index-iso-8859-4.txt: -------------------------------------------------------------------------------- 1 | # For details on index index-iso-8859-4.txt see the Encoding Standard 2 | # https://encoding.spec.whatwg.org/ 3 | # 4 | # Identifier: 72f29c92344d351fe9e74a946e7e0468d76d542c6894ff82982cb652ebe0feb7 5 | # Date: 2018-01-06 6 | 7 | 0 0x0080 € () 8 | 1 0x0081  () 9 | 2 0x0082 ‚ () 10 | 3 0x0083 ƒ () 11 | 4 0x0084 „ () 12 | 5 0x0085 … () 13 | 6 0x0086 † () 14 | 7 0x0087 ‡ () 15 | 8 0x0088 ˆ () 16 | 9 0x0089 ‰ () 17 | 10 0x008A Š () 18 | 11 0x008B ‹ () 19 | 12 0x008C Œ () 20 | 13 0x008D  () 21 | 14 0x008E Ž () 22 | 15 0x008F  () 23 | 16 0x0090  () 24 | 17 0x0091 ‘ () 25 | 18 0x0092 ’ () 26 | 19 0x0093 “ () 27 | 20 0x0094 ” () 28 | 21 0x0095 • () 29 | 22 0x0096 – () 30 | 23 0x0097 — () 31 | 24 0x0098 ˜ () 32 | 25 0x0099 ™ () 33 | 26 0x009A š () 34 | 27 0x009B › () 35 | 28 0x009C œ () 36 | 29 0x009D  () 37 | 30 0x009E ž () 38 | 31 0x009F Ÿ () 39 | 32 0x00A0   (NO-BREAK SPACE) 40 | 33 0x0104 Ą (LATIN CAPITAL LETTER A WITH OGONEK) 41 | 34 0x0138 ĸ (LATIN SMALL LETTER KRA) 42 | 35 0x0156 Ŗ (LATIN CAPITAL LETTER R WITH CEDILLA) 43 | 36 0x00A4 ¤ (CURRENCY SIGN) 44 | 37 0x0128 Ĩ (LATIN CAPITAL LETTER I WITH TILDE) 45 | 38 0x013B Ļ (LATIN CAPITAL LETTER L WITH CEDILLA) 46 | 39 0x00A7 § (SECTION SIGN) 47 | 40 0x00A8 ¨ (DIAERESIS) 48 | 41 0x0160 Š (LATIN CAPITAL LETTER S WITH CARON) 49 | 42 0x0112 Ē (LATIN CAPITAL LETTER E WITH MACRON) 50 | 43 0x0122 Ģ (LATIN CAPITAL LETTER G WITH CEDILLA) 51 | 44 0x0166 Ŧ (LATIN CAPITAL LETTER T WITH STROKE) 52 | 45 0x00AD ­ (SOFT HYPHEN) 53 | 46 0x017D Ž (LATIN CAPITAL LETTER Z WITH CARON) 54 | 47 0x00AF ¯ (MACRON) 55 | 48 0x00B0 ° (DEGREE SIGN) 56 | 49 0x0105 ą (LATIN SMALL LETTER A WITH OGONEK) 57 | 50 0x02DB ˛ (OGONEK) 58 | 51 0x0157 ŗ (LATIN SMALL LETTER R WITH CEDILLA) 59 | 52 0x00B4 ´ (ACUTE ACCENT) 60 | 53 0x0129 ĩ (LATIN SMALL LETTER I WITH TILDE) 61 | 54 0x013C ļ (LATIN SMALL LETTER L WITH CEDILLA) 62 | 55 0x02C7 ˇ (CARON) 63 | 56 0x00B8 ¸ (CEDILLA) 64 | 57 0x0161 š (LATIN SMALL LETTER S WITH CARON) 65 | 58 0x0113 ē (LATIN SMALL LETTER E WITH MACRON) 66 | 59 0x0123 ģ (LATIN SMALL LETTER G WITH CEDILLA) 67 | 60 0x0167 ŧ (LATIN SMALL LETTER T WITH STROKE) 68 | 61 0x014A Ŋ (LATIN CAPITAL LETTER ENG) 69 | 62 0x017E ž (LATIN SMALL LETTER Z WITH CARON) 70 | 63 0x014B ŋ (LATIN SMALL LETTER ENG) 71 | 64 0x0100 Ā (LATIN CAPITAL LETTER A WITH MACRON) 72 | 65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE) 73 | 66 0x00C2  (LATIN CAPITAL LETTER A WITH CIRCUMFLEX) 74 | 67 0x00C3 à (LATIN CAPITAL LETTER A WITH TILDE) 75 | 68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS) 76 | 69 0x00C5 Å (LATIN CAPITAL LETTER A WITH RING ABOVE) 77 | 70 0x00C6 Æ (LATIN CAPITAL LETTER AE) 78 | 71 0x012E Į (LATIN CAPITAL LETTER I WITH OGONEK) 79 | 72 0x010C Č (LATIN CAPITAL LETTER C WITH CARON) 80 | 73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE) 81 | 74 0x0118 Ę (LATIN CAPITAL LETTER E WITH OGONEK) 82 | 75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS) 83 | 76 0x0116 Ė (LATIN CAPITAL LETTER E WITH DOT ABOVE) 84 | 77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE) 85 | 78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX) 86 | 79 0x012A Ī (LATIN CAPITAL LETTER I WITH MACRON) 87 | 80 0x0110 Đ (LATIN CAPITAL LETTER D WITH STROKE) 88 | 81 0x0145 Ņ (LATIN CAPITAL LETTER N WITH CEDILLA) 89 | 82 0x014C Ō (LATIN CAPITAL LETTER O WITH MACRON) 90 | 83 0x0136 Ķ (LATIN CAPITAL LETTER K WITH CEDILLA) 91 | 84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX) 92 | 85 0x00D5 Õ (LATIN CAPITAL LETTER O WITH TILDE) 93 | 86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS) 94 | 87 0x00D7 × (MULTIPLICATION SIGN) 95 | 88 0x00D8 Ø (LATIN CAPITAL LETTER O WITH STROKE) 96 | 89 0x0172 Ų (LATIN CAPITAL LETTER U WITH OGONEK) 97 | 90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE) 98 | 91 0x00DB Û (LATIN CAPITAL LETTER U WITH CIRCUMFLEX) 99 | 92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS) 100 | 93 0x0168 Ũ (LATIN CAPITAL LETTER U WITH TILDE) 101 | 94 0x016A Ū (LATIN CAPITAL LETTER U WITH MACRON) 102 | 95 0x00DF ß (LATIN SMALL LETTER SHARP S) 103 | 96 0x0101 ā (LATIN SMALL LETTER A WITH MACRON) 104 | 97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE) 105 | 98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX) 106 | 99 0x00E3 ã (LATIN SMALL LETTER A WITH TILDE) 107 | 100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS) 108 | 101 0x00E5 å (LATIN SMALL LETTER A WITH RING ABOVE) 109 | 102 0x00E6 æ (LATIN SMALL LETTER AE) 110 | 103 0x012F į (LATIN SMALL LETTER I WITH OGONEK) 111 | 104 0x010D č (LATIN SMALL LETTER C WITH CARON) 112 | 105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE) 113 | 106 0x0119 ę (LATIN SMALL LETTER E WITH OGONEK) 114 | 107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS) 115 | 108 0x0117 ė (LATIN SMALL LETTER E WITH DOT ABOVE) 116 | 109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE) 117 | 110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX) 118 | 111 0x012B ī (LATIN SMALL LETTER I WITH MACRON) 119 | 112 0x0111 đ (LATIN SMALL LETTER D WITH STROKE) 120 | 113 0x0146 ņ (LATIN SMALL LETTER N WITH CEDILLA) 121 | 114 0x014D ō (LATIN SMALL LETTER O WITH MACRON) 122 | 115 0x0137 ķ (LATIN SMALL LETTER K WITH CEDILLA) 123 | 116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX) 124 | 117 0x00F5 õ (LATIN SMALL LETTER O WITH TILDE) 125 | 118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS) 126 | 119 0x00F7 ÷ (DIVISION SIGN) 127 | 120 0x00F8 ø (LATIN SMALL LETTER O WITH STROKE) 128 | 121 0x0173 ų (LATIN SMALL LETTER U WITH OGONEK) 129 | 122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE) 130 | 123 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX) 131 | 124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS) 132 | 125 0x0169 ũ (LATIN SMALL LETTER U WITH TILDE) 133 | 126 0x016B ū (LATIN SMALL LETTER U WITH MACRON) 134 | 127 0x02D9 ˙ (DOT ABOVE) 135 | -------------------------------------------------------------------------------- /encodings/index-iso-8859-5.txt: -------------------------------------------------------------------------------- 1 | # For details on index index-iso-8859-5.txt see the Encoding Standard 2 | # https://encoding.spec.whatwg.org/ 3 | # 4 | # Identifier: fa9b1f3f5242df43e2e7bca80e9b6997c67944f20a4af91ee06bacc4e132d9c9 5 | # Date: 2018-01-06 6 | 7 | 0 0x0080 € () 8 | 1 0x0081  () 9 | 2 0x0082 ‚ () 10 | 3 0x0083 ƒ () 11 | 4 0x0084 „ () 12 | 5 0x0085 … () 13 | 6 0x0086 † () 14 | 7 0x0087 ‡ () 15 | 8 0x0088 ˆ () 16 | 9 0x0089 ‰ () 17 | 10 0x008A Š () 18 | 11 0x008B ‹ () 19 | 12 0x008C Œ () 20 | 13 0x008D  () 21 | 14 0x008E Ž () 22 | 15 0x008F  () 23 | 16 0x0090  () 24 | 17 0x0091 ‘ () 25 | 18 0x0092 ’ () 26 | 19 0x0093 “ () 27 | 20 0x0094 ” () 28 | 21 0x0095 • () 29 | 22 0x0096 – () 30 | 23 0x0097 — () 31 | 24 0x0098 ˜ () 32 | 25 0x0099 ™ () 33 | 26 0x009A š () 34 | 27 0x009B › () 35 | 28 0x009C œ () 36 | 29 0x009D  () 37 | 30 0x009E ž () 38 | 31 0x009F Ÿ () 39 | 32 0x00A0   (NO-BREAK SPACE) 40 | 33 0x0401 Ё (CYRILLIC CAPITAL LETTER IO) 41 | 34 0x0402 Ђ (CYRILLIC CAPITAL LETTER DJE) 42 | 35 0x0403 Ѓ (CYRILLIC CAPITAL LETTER GJE) 43 | 36 0x0404 Є (CYRILLIC CAPITAL LETTER UKRAINIAN IE) 44 | 37 0x0405 Ѕ (CYRILLIC CAPITAL LETTER DZE) 45 | 38 0x0406 І (CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I) 46 | 39 0x0407 Ї (CYRILLIC CAPITAL LETTER YI) 47 | 40 0x0408 Ј (CYRILLIC CAPITAL LETTER JE) 48 | 41 0x0409 Љ (CYRILLIC CAPITAL LETTER LJE) 49 | 42 0x040A Њ (CYRILLIC CAPITAL LETTER NJE) 50 | 43 0x040B Ћ (CYRILLIC CAPITAL LETTER TSHE) 51 | 44 0x040C Ќ (CYRILLIC CAPITAL LETTER KJE) 52 | 45 0x00AD ­ (SOFT HYPHEN) 53 | 46 0x040E Ў (CYRILLIC CAPITAL LETTER SHORT U) 54 | 47 0x040F Џ (CYRILLIC CAPITAL LETTER DZHE) 55 | 48 0x0410 А (CYRILLIC CAPITAL LETTER A) 56 | 49 0x0411 Б (CYRILLIC CAPITAL LETTER BE) 57 | 50 0x0412 В (CYRILLIC CAPITAL LETTER VE) 58 | 51 0x0413 Г (CYRILLIC CAPITAL LETTER GHE) 59 | 52 0x0414 Д (CYRILLIC CAPITAL LETTER DE) 60 | 53 0x0415 Е (CYRILLIC CAPITAL LETTER IE) 61 | 54 0x0416 Ж (CYRILLIC CAPITAL LETTER ZHE) 62 | 55 0x0417 З (CYRILLIC CAPITAL LETTER ZE) 63 | 56 0x0418 И (CYRILLIC CAPITAL LETTER I) 64 | 57 0x0419 Й (CYRILLIC CAPITAL LETTER SHORT I) 65 | 58 0x041A К (CYRILLIC CAPITAL LETTER KA) 66 | 59 0x041B Л (CYRILLIC CAPITAL LETTER EL) 67 | 60 0x041C М (CYRILLIC CAPITAL LETTER EM) 68 | 61 0x041D Н (CYRILLIC CAPITAL LETTER EN) 69 | 62 0x041E О (CYRILLIC CAPITAL LETTER O) 70 | 63 0x041F П (CYRILLIC CAPITAL LETTER PE) 71 | 64 0x0420 Р (CYRILLIC CAPITAL LETTER ER) 72 | 65 0x0421 С (CYRILLIC CAPITAL LETTER ES) 73 | 66 0x0422 Т (CYRILLIC CAPITAL LETTER TE) 74 | 67 0x0423 У (CYRILLIC CAPITAL LETTER U) 75 | 68 0x0424 Ф (CYRILLIC CAPITAL LETTER EF) 76 | 69 0x0425 Х (CYRILLIC CAPITAL LETTER HA) 77 | 70 0x0426 Ц (CYRILLIC CAPITAL LETTER TSE) 78 | 71 0x0427 Ч (CYRILLIC CAPITAL LETTER CHE) 79 | 72 0x0428 Ш (CYRILLIC CAPITAL LETTER SHA) 80 | 73 0x0429 Щ (CYRILLIC CAPITAL LETTER SHCHA) 81 | 74 0x042A Ъ (CYRILLIC CAPITAL LETTER HARD SIGN) 82 | 75 0x042B Ы (CYRILLIC CAPITAL LETTER YERU) 83 | 76 0x042C Ь (CYRILLIC CAPITAL LETTER SOFT SIGN) 84 | 77 0x042D Э (CYRILLIC CAPITAL LETTER E) 85 | 78 0x042E Ю (CYRILLIC CAPITAL LETTER YU) 86 | 79 0x042F Я (CYRILLIC CAPITAL LETTER YA) 87 | 80 0x0430 а (CYRILLIC SMALL LETTER A) 88 | 81 0x0431 б (CYRILLIC SMALL LETTER BE) 89 | 82 0x0432 в (CYRILLIC SMALL LETTER VE) 90 | 83 0x0433 г (CYRILLIC SMALL LETTER GHE) 91 | 84 0x0434 д (CYRILLIC SMALL LETTER DE) 92 | 85 0x0435 е (CYRILLIC SMALL LETTER IE) 93 | 86 0x0436 ж (CYRILLIC SMALL LETTER ZHE) 94 | 87 0x0437 з (CYRILLIC SMALL LETTER ZE) 95 | 88 0x0438 и (CYRILLIC SMALL LETTER I) 96 | 89 0x0439 й (CYRILLIC SMALL LETTER SHORT I) 97 | 90 0x043A к (CYRILLIC SMALL LETTER KA) 98 | 91 0x043B л (CYRILLIC SMALL LETTER EL) 99 | 92 0x043C м (CYRILLIC SMALL LETTER EM) 100 | 93 0x043D н (CYRILLIC SMALL LETTER EN) 101 | 94 0x043E о (CYRILLIC SMALL LETTER O) 102 | 95 0x043F п (CYRILLIC SMALL LETTER PE) 103 | 96 0x0440 р (CYRILLIC SMALL LETTER ER) 104 | 97 0x0441 с (CYRILLIC SMALL LETTER ES) 105 | 98 0x0442 т (CYRILLIC SMALL LETTER TE) 106 | 99 0x0443 у (CYRILLIC SMALL LETTER U) 107 | 100 0x0444 ф (CYRILLIC SMALL LETTER EF) 108 | 101 0x0445 х (CYRILLIC SMALL LETTER HA) 109 | 102 0x0446 ц (CYRILLIC SMALL LETTER TSE) 110 | 103 0x0447 ч (CYRILLIC SMALL LETTER CHE) 111 | 104 0x0448 ш (CYRILLIC SMALL LETTER SHA) 112 | 105 0x0449 щ (CYRILLIC SMALL LETTER SHCHA) 113 | 106 0x044A ъ (CYRILLIC SMALL LETTER HARD SIGN) 114 | 107 0x044B ы (CYRILLIC SMALL LETTER YERU) 115 | 108 0x044C ь (CYRILLIC SMALL LETTER SOFT SIGN) 116 | 109 0x044D э (CYRILLIC SMALL LETTER E) 117 | 110 0x044E ю (CYRILLIC SMALL LETTER YU) 118 | 111 0x044F я (CYRILLIC SMALL LETTER YA) 119 | 112 0x2116 № (NUMERO SIGN) 120 | 113 0x0451 ё (CYRILLIC SMALL LETTER IO) 121 | 114 0x0452 ђ (CYRILLIC SMALL LETTER DJE) 122 | 115 0x0453 ѓ (CYRILLIC SMALL LETTER GJE) 123 | 116 0x0454 є (CYRILLIC SMALL LETTER UKRAINIAN IE) 124 | 117 0x0455 ѕ (CYRILLIC SMALL LETTER DZE) 125 | 118 0x0456 і (CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I) 126 | 119 0x0457 ї (CYRILLIC SMALL LETTER YI) 127 | 120 0x0458 ј (CYRILLIC SMALL LETTER JE) 128 | 121 0x0459 љ (CYRILLIC SMALL LETTER LJE) 129 | 122 0x045A њ (CYRILLIC SMALL LETTER NJE) 130 | 123 0x045B ћ (CYRILLIC SMALL LETTER TSHE) 131 | 124 0x045C ќ (CYRILLIC SMALL LETTER KJE) 132 | 125 0x00A7 § (SECTION SIGN) 133 | 126 0x045E ў (CYRILLIC SMALL LETTER SHORT U) 134 | 127 0x045F џ (CYRILLIC SMALL LETTER DZHE) 135 | -------------------------------------------------------------------------------- /encodings/index-iso-8859-6.txt: -------------------------------------------------------------------------------- 1 | # For details on index index-iso-8859-6.txt see the Encoding Standard 2 | # https://encoding.spec.whatwg.org/ 3 | # 4 | # Identifier: 85bb7b5c2dc75975afebe5743935ba4ed5a09c1e9e34e9bfb2ff80293f5d8bbc 5 | # Date: 2018-01-06 6 | 7 | 0 0x0080 € () 8 | 1 0x0081  () 9 | 2 0x0082 ‚ () 10 | 3 0x0083 ƒ () 11 | 4 0x0084 „ () 12 | 5 0x0085 … () 13 | 6 0x0086 † () 14 | 7 0x0087 ‡ () 15 | 8 0x0088 ˆ () 16 | 9 0x0089 ‰ () 17 | 10 0x008A Š () 18 | 11 0x008B ‹ () 19 | 12 0x008C Œ () 20 | 13 0x008D  () 21 | 14 0x008E Ž () 22 | 15 0x008F  () 23 | 16 0x0090  () 24 | 17 0x0091 ‘ () 25 | 18 0x0092 ’ () 26 | 19 0x0093 “ () 27 | 20 0x0094 ” () 28 | 21 0x0095 • () 29 | 22 0x0096 – () 30 | 23 0x0097 — () 31 | 24 0x0098 ˜ () 32 | 25 0x0099 ™ () 33 | 26 0x009A š () 34 | 27 0x009B › () 35 | 28 0x009C œ () 36 | 29 0x009D  () 37 | 30 0x009E ž () 38 | 31 0x009F Ÿ () 39 | 32 0x00A0   (NO-BREAK SPACE) 40 | 36 0x00A4 ¤ (CURRENCY SIGN) 41 | 44 0x060C ، (ARABIC COMMA) 42 | 45 0x00AD ­ (SOFT HYPHEN) 43 | 59 0x061B ؛ (ARABIC SEMICOLON) 44 | 63 0x061F ؟ (ARABIC QUESTION MARK) 45 | 65 0x0621 ء (ARABIC LETTER HAMZA) 46 | 66 0x0622 آ (ARABIC LETTER ALEF WITH MADDA ABOVE) 47 | 67 0x0623 أ (ARABIC LETTER ALEF WITH HAMZA ABOVE) 48 | 68 0x0624 ؤ (ARABIC LETTER WAW WITH HAMZA ABOVE) 49 | 69 0x0625 إ (ARABIC LETTER ALEF WITH HAMZA BELOW) 50 | 70 0x0626 ئ (ARABIC LETTER YEH WITH HAMZA ABOVE) 51 | 71 0x0627 ا (ARABIC LETTER ALEF) 52 | 72 0x0628 ب (ARABIC LETTER BEH) 53 | 73 0x0629 ة (ARABIC LETTER TEH MARBUTA) 54 | 74 0x062A ت (ARABIC LETTER TEH) 55 | 75 0x062B ث (ARABIC LETTER THEH) 56 | 76 0x062C ج (ARABIC LETTER JEEM) 57 | 77 0x062D ح (ARABIC LETTER HAH) 58 | 78 0x062E خ (ARABIC LETTER KHAH) 59 | 79 0x062F د (ARABIC LETTER DAL) 60 | 80 0x0630 ذ (ARABIC LETTER THAL) 61 | 81 0x0631 ر (ARABIC LETTER REH) 62 | 82 0x0632 ز (ARABIC LETTER ZAIN) 63 | 83 0x0633 س (ARABIC LETTER SEEN) 64 | 84 0x0634 ش (ARABIC LETTER SHEEN) 65 | 85 0x0635 ص (ARABIC LETTER SAD) 66 | 86 0x0636 ض (ARABIC LETTER DAD) 67 | 87 0x0637 ط (ARABIC LETTER TAH) 68 | 88 0x0638 ظ (ARABIC LETTER ZAH) 69 | 89 0x0639 ع (ARABIC LETTER AIN) 70 | 90 0x063A غ (ARABIC LETTER GHAIN) 71 | 96 0x0640 ـ (ARABIC TATWEEL) 72 | 97 0x0641 ف (ARABIC LETTER FEH) 73 | 98 0x0642 ق (ARABIC LETTER QAF) 74 | 99 0x0643 ك (ARABIC LETTER KAF) 75 | 100 0x0644 ل (ARABIC LETTER LAM) 76 | 101 0x0645 م (ARABIC LETTER MEEM) 77 | 102 0x0646 ن (ARABIC LETTER NOON) 78 | 103 0x0647 ه (ARABIC LETTER HEH) 79 | 104 0x0648 و (ARABIC LETTER WAW) 80 | 105 0x0649 ى (ARABIC LETTER ALEF MAKSURA) 81 | 106 0x064A ي (ARABIC LETTER YEH) 82 | 107 0x064B ً (ARABIC FATHATAN) 83 | 108 0x064C ٌ (ARABIC DAMMATAN) 84 | 109 0x064D ٍ (ARABIC KASRATAN) 85 | 110 0x064E َ (ARABIC FATHA) 86 | 111 0x064F ُ (ARABIC DAMMA) 87 | 112 0x0650 ِ (ARABIC KASRA) 88 | 113 0x0651 ّ (ARABIC SHADDA) 89 | 114 0x0652 ْ (ARABIC SUKUN) 90 | -------------------------------------------------------------------------------- /encodings/index-iso-8859-7.txt: -------------------------------------------------------------------------------- 1 | # For details on index index-iso-8859-7.txt see the Encoding Standard 2 | # https://encoding.spec.whatwg.org/ 3 | # 4 | # Identifier: f53d8aeba36314ef950eef02ffcf11dff540638ce27dfe7a86b6ccc6875afb24 5 | # Date: 2018-01-06 6 | 7 | 0 0x0080 € () 8 | 1 0x0081  () 9 | 2 0x0082 ‚ () 10 | 3 0x0083 ƒ () 11 | 4 0x0084 „ () 12 | 5 0x0085 … () 13 | 6 0x0086 † () 14 | 7 0x0087 ‡ () 15 | 8 0x0088 ˆ () 16 | 9 0x0089 ‰ () 17 | 10 0x008A Š () 18 | 11 0x008B ‹ () 19 | 12 0x008C Œ () 20 | 13 0x008D  () 21 | 14 0x008E Ž () 22 | 15 0x008F  () 23 | 16 0x0090  () 24 | 17 0x0091 ‘ () 25 | 18 0x0092 ’ () 26 | 19 0x0093 “ () 27 | 20 0x0094 ” () 28 | 21 0x0095 • () 29 | 22 0x0096 – () 30 | 23 0x0097 — () 31 | 24 0x0098 ˜ () 32 | 25 0x0099 ™ () 33 | 26 0x009A š () 34 | 27 0x009B › () 35 | 28 0x009C œ () 36 | 29 0x009D  () 37 | 30 0x009E ž () 38 | 31 0x009F Ÿ () 39 | 32 0x00A0   (NO-BREAK SPACE) 40 | 33 0x2018 ‘ (LEFT SINGLE QUOTATION MARK) 41 | 34 0x2019 ’ (RIGHT SINGLE QUOTATION MARK) 42 | 35 0x00A3 £ (POUND SIGN) 43 | 36 0x20AC € (EURO SIGN) 44 | 37 0x20AF ₯ (DRACHMA SIGN) 45 | 38 0x00A6 ¦ (BROKEN BAR) 46 | 39 0x00A7 § (SECTION SIGN) 47 | 40 0x00A8 ¨ (DIAERESIS) 48 | 41 0x00A9 © (COPYRIGHT SIGN) 49 | 42 0x037A ͺ (GREEK YPOGEGRAMMENI) 50 | 43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) 51 | 44 0x00AC ¬ (NOT SIGN) 52 | 45 0x00AD ­ (SOFT HYPHEN) 53 | 47 0x2015 ― (HORIZONTAL BAR) 54 | 48 0x00B0 ° (DEGREE SIGN) 55 | 49 0x00B1 ± (PLUS-MINUS SIGN) 56 | 50 0x00B2 ² (SUPERSCRIPT TWO) 57 | 51 0x00B3 ³ (SUPERSCRIPT THREE) 58 | 52 0x0384 ΄ (GREEK TONOS) 59 | 53 0x0385 ΅ (GREEK DIALYTIKA TONOS) 60 | 54 0x0386 Ά (GREEK CAPITAL LETTER ALPHA WITH TONOS) 61 | 55 0x00B7 · (MIDDLE DOT) 62 | 56 0x0388 Έ (GREEK CAPITAL LETTER EPSILON WITH TONOS) 63 | 57 0x0389 Ή (GREEK CAPITAL LETTER ETA WITH TONOS) 64 | 58 0x038A Ί (GREEK CAPITAL LETTER IOTA WITH TONOS) 65 | 59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) 66 | 60 0x038C Ό (GREEK CAPITAL LETTER OMICRON WITH TONOS) 67 | 61 0x00BD ½ (VULGAR FRACTION ONE HALF) 68 | 62 0x038E Ύ (GREEK CAPITAL LETTER UPSILON WITH TONOS) 69 | 63 0x038F Ώ (GREEK CAPITAL LETTER OMEGA WITH TONOS) 70 | 64 0x0390 ΐ (GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS) 71 | 65 0x0391 Α (GREEK CAPITAL LETTER ALPHA) 72 | 66 0x0392 Β (GREEK CAPITAL LETTER BETA) 73 | 67 0x0393 Γ (GREEK CAPITAL LETTER GAMMA) 74 | 68 0x0394 Δ (GREEK CAPITAL LETTER DELTA) 75 | 69 0x0395 Ε (GREEK CAPITAL LETTER EPSILON) 76 | 70 0x0396 Ζ (GREEK CAPITAL LETTER ZETA) 77 | 71 0x0397 Η (GREEK CAPITAL LETTER ETA) 78 | 72 0x0398 Θ (GREEK CAPITAL LETTER THETA) 79 | 73 0x0399 Ι (GREEK CAPITAL LETTER IOTA) 80 | 74 0x039A Κ (GREEK CAPITAL LETTER KAPPA) 81 | 75 0x039B Λ (GREEK CAPITAL LETTER LAMDA) 82 | 76 0x039C Μ (GREEK CAPITAL LETTER MU) 83 | 77 0x039D Ν (GREEK CAPITAL LETTER NU) 84 | 78 0x039E Ξ (GREEK CAPITAL LETTER XI) 85 | 79 0x039F Ο (GREEK CAPITAL LETTER OMICRON) 86 | 80 0x03A0 Π (GREEK CAPITAL LETTER PI) 87 | 81 0x03A1 Ρ (GREEK CAPITAL LETTER RHO) 88 | 83 0x03A3 Σ (GREEK CAPITAL LETTER SIGMA) 89 | 84 0x03A4 Τ (GREEK CAPITAL LETTER TAU) 90 | 85 0x03A5 Υ (GREEK CAPITAL LETTER UPSILON) 91 | 86 0x03A6 Φ (GREEK CAPITAL LETTER PHI) 92 | 87 0x03A7 Χ (GREEK CAPITAL LETTER CHI) 93 | 88 0x03A8 Ψ (GREEK CAPITAL LETTER PSI) 94 | 89 0x03A9 Ω (GREEK CAPITAL LETTER OMEGA) 95 | 90 0x03AA Ϊ (GREEK CAPITAL LETTER IOTA WITH DIALYTIKA) 96 | 91 0x03AB Ϋ (GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA) 97 | 92 0x03AC ά (GREEK SMALL LETTER ALPHA WITH TONOS) 98 | 93 0x03AD έ (GREEK SMALL LETTER EPSILON WITH TONOS) 99 | 94 0x03AE ή (GREEK SMALL LETTER ETA WITH TONOS) 100 | 95 0x03AF ί (GREEK SMALL LETTER IOTA WITH TONOS) 101 | 96 0x03B0 ΰ (GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS) 102 | 97 0x03B1 α (GREEK SMALL LETTER ALPHA) 103 | 98 0x03B2 β (GREEK SMALL LETTER BETA) 104 | 99 0x03B3 γ (GREEK SMALL LETTER GAMMA) 105 | 100 0x03B4 δ (GREEK SMALL LETTER DELTA) 106 | 101 0x03B5 ε (GREEK SMALL LETTER EPSILON) 107 | 102 0x03B6 ζ (GREEK SMALL LETTER ZETA) 108 | 103 0x03B7 η (GREEK SMALL LETTER ETA) 109 | 104 0x03B8 θ (GREEK SMALL LETTER THETA) 110 | 105 0x03B9 ι (GREEK SMALL LETTER IOTA) 111 | 106 0x03BA κ (GREEK SMALL LETTER KAPPA) 112 | 107 0x03BB λ (GREEK SMALL LETTER LAMDA) 113 | 108 0x03BC μ (GREEK SMALL LETTER MU) 114 | 109 0x03BD ν (GREEK SMALL LETTER NU) 115 | 110 0x03BE ξ (GREEK SMALL LETTER XI) 116 | 111 0x03BF ο (GREEK SMALL LETTER OMICRON) 117 | 112 0x03C0 π (GREEK SMALL LETTER PI) 118 | 113 0x03C1 ρ (GREEK SMALL LETTER RHO) 119 | 114 0x03C2 ς (GREEK SMALL LETTER FINAL SIGMA) 120 | 115 0x03C3 σ (GREEK SMALL LETTER SIGMA) 121 | 116 0x03C4 τ (GREEK SMALL LETTER TAU) 122 | 117 0x03C5 υ (GREEK SMALL LETTER UPSILON) 123 | 118 0x03C6 φ (GREEK SMALL LETTER PHI) 124 | 119 0x03C7 χ (GREEK SMALL LETTER CHI) 125 | 120 0x03C8 ψ (GREEK SMALL LETTER PSI) 126 | 121 0x03C9 ω (GREEK SMALL LETTER OMEGA) 127 | 122 0x03CA ϊ (GREEK SMALL LETTER IOTA WITH DIALYTIKA) 128 | 123 0x03CB ϋ (GREEK SMALL LETTER UPSILON WITH DIALYTIKA) 129 | 124 0x03CC ό (GREEK SMALL LETTER OMICRON WITH TONOS) 130 | 125 0x03CD ύ (GREEK SMALL LETTER UPSILON WITH TONOS) 131 | 126 0x03CE ώ (GREEK SMALL LETTER OMEGA WITH TONOS) 132 | -------------------------------------------------------------------------------- /encodings/index-iso-8859-8.txt: -------------------------------------------------------------------------------- 1 | # For details on index index-iso-8859-8.txt see the Encoding Standard 2 | # https://encoding.spec.whatwg.org/ 3 | # 4 | # Identifier: 7657a9ca3fa875990da960d3f812eea28dcd0ae6ed55a18d5394303c86f5484b 5 | # Date: 2018-01-06 6 | 7 | 0 0x0080 € () 8 | 1 0x0081  () 9 | 2 0x0082 ‚ () 10 | 3 0x0083 ƒ () 11 | 4 0x0084 „ () 12 | 5 0x0085 … () 13 | 6 0x0086 † () 14 | 7 0x0087 ‡ () 15 | 8 0x0088 ˆ () 16 | 9 0x0089 ‰ () 17 | 10 0x008A Š () 18 | 11 0x008B ‹ () 19 | 12 0x008C Œ () 20 | 13 0x008D  () 21 | 14 0x008E Ž () 22 | 15 0x008F  () 23 | 16 0x0090  () 24 | 17 0x0091 ‘ () 25 | 18 0x0092 ’ () 26 | 19 0x0093 “ () 27 | 20 0x0094 ” () 28 | 21 0x0095 • () 29 | 22 0x0096 – () 30 | 23 0x0097 — () 31 | 24 0x0098 ˜ () 32 | 25 0x0099 ™ () 33 | 26 0x009A š () 34 | 27 0x009B › () 35 | 28 0x009C œ () 36 | 29 0x009D  () 37 | 30 0x009E ž () 38 | 31 0x009F Ÿ () 39 | 32 0x00A0   (NO-BREAK SPACE) 40 | 34 0x00A2 ¢ (CENT SIGN) 41 | 35 0x00A3 £ (POUND SIGN) 42 | 36 0x00A4 ¤ (CURRENCY SIGN) 43 | 37 0x00A5 ¥ (YEN SIGN) 44 | 38 0x00A6 ¦ (BROKEN BAR) 45 | 39 0x00A7 § (SECTION SIGN) 46 | 40 0x00A8 ¨ (DIAERESIS) 47 | 41 0x00A9 © (COPYRIGHT SIGN) 48 | 42 0x00D7 × (MULTIPLICATION SIGN) 49 | 43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) 50 | 44 0x00AC ¬ (NOT SIGN) 51 | 45 0x00AD ­ (SOFT HYPHEN) 52 | 46 0x00AE ® (REGISTERED SIGN) 53 | 47 0x00AF ¯ (MACRON) 54 | 48 0x00B0 ° (DEGREE SIGN) 55 | 49 0x00B1 ± (PLUS-MINUS SIGN) 56 | 50 0x00B2 ² (SUPERSCRIPT TWO) 57 | 51 0x00B3 ³ (SUPERSCRIPT THREE) 58 | 52 0x00B4 ´ (ACUTE ACCENT) 59 | 53 0x00B5 µ (MICRO SIGN) 60 | 54 0x00B6 ¶ (PILCROW SIGN) 61 | 55 0x00B7 · (MIDDLE DOT) 62 | 56 0x00B8 ¸ (CEDILLA) 63 | 57 0x00B9 ¹ (SUPERSCRIPT ONE) 64 | 58 0x00F7 ÷ (DIVISION SIGN) 65 | 59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) 66 | 60 0x00BC ¼ (VULGAR FRACTION ONE QUARTER) 67 | 61 0x00BD ½ (VULGAR FRACTION ONE HALF) 68 | 62 0x00BE ¾ (VULGAR FRACTION THREE QUARTERS) 69 | 95 0x2017 ‗ (DOUBLE LOW LINE) 70 | 96 0x05D0 א (HEBREW LETTER ALEF) 71 | 97 0x05D1 ב (HEBREW LETTER BET) 72 | 98 0x05D2 ג (HEBREW LETTER GIMEL) 73 | 99 0x05D3 ד (HEBREW LETTER DALET) 74 | 100 0x05D4 ה (HEBREW LETTER HE) 75 | 101 0x05D5 ו (HEBREW LETTER VAV) 76 | 102 0x05D6 ז (HEBREW LETTER ZAYIN) 77 | 103 0x05D7 ח (HEBREW LETTER HET) 78 | 104 0x05D8 ט (HEBREW LETTER TET) 79 | 105 0x05D9 י (HEBREW LETTER YOD) 80 | 106 0x05DA ך (HEBREW LETTER FINAL KAF) 81 | 107 0x05DB כ (HEBREW LETTER KAF) 82 | 108 0x05DC ל (HEBREW LETTER LAMED) 83 | 109 0x05DD ם (HEBREW LETTER FINAL MEM) 84 | 110 0x05DE מ (HEBREW LETTER MEM) 85 | 111 0x05DF ן (HEBREW LETTER FINAL NUN) 86 | 112 0x05E0 נ (HEBREW LETTER NUN) 87 | 113 0x05E1 ס (HEBREW LETTER SAMEKH) 88 | 114 0x05E2 ע (HEBREW LETTER AYIN) 89 | 115 0x05E3 ף (HEBREW LETTER FINAL PE) 90 | 116 0x05E4 פ (HEBREW LETTER PE) 91 | 117 0x05E5 ץ (HEBREW LETTER FINAL TSADI) 92 | 118 0x05E6 צ (HEBREW LETTER TSADI) 93 | 119 0x05E7 ק (HEBREW LETTER QOF) 94 | 120 0x05E8 ר (HEBREW LETTER RESH) 95 | 121 0x05E9 ש (HEBREW LETTER SHIN) 96 | 122 0x05EA ת (HEBREW LETTER TAV) 97 | 125 0x200E ‎ (LEFT-TO-RIGHT MARK) 98 | 126 0x200F ‏ (RIGHT-TO-LEFT MARK) 99 | -------------------------------------------------------------------------------- /encodings/index-koi8-r.txt: -------------------------------------------------------------------------------- 1 | # For details on index index-koi8-r.txt see the Encoding Standard 2 | # https://encoding.spec.whatwg.org/ 3 | # 4 | # Identifier: c5497cd9071cb352c0e56b219154e539badf63de40b71578f09e2e11fe7d50ae 5 | # Date: 2018-01-06 6 | 7 | 0 0x2500 ─ (BOX DRAWINGS LIGHT HORIZONTAL) 8 | 1 0x2502 │ (BOX DRAWINGS LIGHT VERTICAL) 9 | 2 0x250C ┌ (BOX DRAWINGS LIGHT DOWN AND RIGHT) 10 | 3 0x2510 ┐ (BOX DRAWINGS LIGHT DOWN AND LEFT) 11 | 4 0x2514 └ (BOX DRAWINGS LIGHT UP AND RIGHT) 12 | 5 0x2518 ┘ (BOX DRAWINGS LIGHT UP AND LEFT) 13 | 6 0x251C ├ (BOX DRAWINGS LIGHT VERTICAL AND RIGHT) 14 | 7 0x2524 ┤ (BOX DRAWINGS LIGHT VERTICAL AND LEFT) 15 | 8 0x252C ┬ (BOX DRAWINGS LIGHT DOWN AND HORIZONTAL) 16 | 9 0x2534 ┴ (BOX DRAWINGS LIGHT UP AND HORIZONTAL) 17 | 10 0x253C ┼ (BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL) 18 | 11 0x2580 ▀ (UPPER HALF BLOCK) 19 | 12 0x2584 ▄ (LOWER HALF BLOCK) 20 | 13 0x2588 █ (FULL BLOCK) 21 | 14 0x258C ▌ (LEFT HALF BLOCK) 22 | 15 0x2590 ▐ (RIGHT HALF BLOCK) 23 | 16 0x2591 ░ (LIGHT SHADE) 24 | 17 0x2592 ▒ (MEDIUM SHADE) 25 | 18 0x2593 ▓ (DARK SHADE) 26 | 19 0x2320 ⌠ (TOP HALF INTEGRAL) 27 | 20 0x25A0 ■ (BLACK SQUARE) 28 | 21 0x2219 ∙ (BULLET OPERATOR) 29 | 22 0x221A √ (SQUARE ROOT) 30 | 23 0x2248 ≈ (ALMOST EQUAL TO) 31 | 24 0x2264 ≤ (LESS-THAN OR EQUAL TO) 32 | 25 0x2265 ≥ (GREATER-THAN OR EQUAL TO) 33 | 26 0x00A0   (NO-BREAK SPACE) 34 | 27 0x2321 ⌡ (BOTTOM HALF INTEGRAL) 35 | 28 0x00B0 ° (DEGREE SIGN) 36 | 29 0x00B2 ² (SUPERSCRIPT TWO) 37 | 30 0x00B7 · (MIDDLE DOT) 38 | 31 0x00F7 ÷ (DIVISION SIGN) 39 | 32 0x2550 ═ (BOX DRAWINGS DOUBLE HORIZONTAL) 40 | 33 0x2551 ║ (BOX DRAWINGS DOUBLE VERTICAL) 41 | 34 0x2552 ╒ (BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE) 42 | 35 0x0451 ё (CYRILLIC SMALL LETTER IO) 43 | 36 0x2553 ╓ (BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE) 44 | 37 0x2554 ╔ (BOX DRAWINGS DOUBLE DOWN AND RIGHT) 45 | 38 0x2555 ╕ (BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE) 46 | 39 0x2556 ╖ (BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE) 47 | 40 0x2557 ╗ (BOX DRAWINGS DOUBLE DOWN AND LEFT) 48 | 41 0x2558 ╘ (BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE) 49 | 42 0x2559 ╙ (BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE) 50 | 43 0x255A ╚ (BOX DRAWINGS DOUBLE UP AND RIGHT) 51 | 44 0x255B ╛ (BOX DRAWINGS UP SINGLE AND LEFT DOUBLE) 52 | 45 0x255C ╜ (BOX DRAWINGS UP DOUBLE AND LEFT SINGLE) 53 | 46 0x255D ╝ (BOX DRAWINGS DOUBLE UP AND LEFT) 54 | 47 0x255E ╞ (BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE) 55 | 48 0x255F ╟ (BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE) 56 | 49 0x2560 ╠ (BOX DRAWINGS DOUBLE VERTICAL AND RIGHT) 57 | 50 0x2561 ╡ (BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE) 58 | 51 0x0401 Ё (CYRILLIC CAPITAL LETTER IO) 59 | 52 0x2562 ╢ (BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE) 60 | 53 0x2563 ╣ (BOX DRAWINGS DOUBLE VERTICAL AND LEFT) 61 | 54 0x2564 ╤ (BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE) 62 | 55 0x2565 ╥ (BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE) 63 | 56 0x2566 ╦ (BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL) 64 | 57 0x2567 ╧ (BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE) 65 | 58 0x2568 ╨ (BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE) 66 | 59 0x2569 ╩ (BOX DRAWINGS DOUBLE UP AND HORIZONTAL) 67 | 60 0x256A ╪ (BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE) 68 | 61 0x256B ╫ (BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE) 69 | 62 0x256C ╬ (BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL) 70 | 63 0x00A9 © (COPYRIGHT SIGN) 71 | 64 0x044E ю (CYRILLIC SMALL LETTER YU) 72 | 65 0x0430 а (CYRILLIC SMALL LETTER A) 73 | 66 0x0431 б (CYRILLIC SMALL LETTER BE) 74 | 67 0x0446 ц (CYRILLIC SMALL LETTER TSE) 75 | 68 0x0434 д (CYRILLIC SMALL LETTER DE) 76 | 69 0x0435 е (CYRILLIC SMALL LETTER IE) 77 | 70 0x0444 ф (CYRILLIC SMALL LETTER EF) 78 | 71 0x0433 г (CYRILLIC SMALL LETTER GHE) 79 | 72 0x0445 х (CYRILLIC SMALL LETTER HA) 80 | 73 0x0438 и (CYRILLIC SMALL LETTER I) 81 | 74 0x0439 й (CYRILLIC SMALL LETTER SHORT I) 82 | 75 0x043A к (CYRILLIC SMALL LETTER KA) 83 | 76 0x043B л (CYRILLIC SMALL LETTER EL) 84 | 77 0x043C м (CYRILLIC SMALL LETTER EM) 85 | 78 0x043D н (CYRILLIC SMALL LETTER EN) 86 | 79 0x043E о (CYRILLIC SMALL LETTER O) 87 | 80 0x043F п (CYRILLIC SMALL LETTER PE) 88 | 81 0x044F я (CYRILLIC SMALL LETTER YA) 89 | 82 0x0440 р (CYRILLIC SMALL LETTER ER) 90 | 83 0x0441 с (CYRILLIC SMALL LETTER ES) 91 | 84 0x0442 т (CYRILLIC SMALL LETTER TE) 92 | 85 0x0443 у (CYRILLIC SMALL LETTER U) 93 | 86 0x0436 ж (CYRILLIC SMALL LETTER ZHE) 94 | 87 0x0432 в (CYRILLIC SMALL LETTER VE) 95 | 88 0x044C ь (CYRILLIC SMALL LETTER SOFT SIGN) 96 | 89 0x044B ы (CYRILLIC SMALL LETTER YERU) 97 | 90 0x0437 з (CYRILLIC SMALL LETTER ZE) 98 | 91 0x0448 ш (CYRILLIC SMALL LETTER SHA) 99 | 92 0x044D э (CYRILLIC SMALL LETTER E) 100 | 93 0x0449 щ (CYRILLIC SMALL LETTER SHCHA) 101 | 94 0x0447 ч (CYRILLIC SMALL LETTER CHE) 102 | 95 0x044A ъ (CYRILLIC SMALL LETTER HARD SIGN) 103 | 96 0x042E Ю (CYRILLIC CAPITAL LETTER YU) 104 | 97 0x0410 А (CYRILLIC CAPITAL LETTER A) 105 | 98 0x0411 Б (CYRILLIC CAPITAL LETTER BE) 106 | 99 0x0426 Ц (CYRILLIC CAPITAL LETTER TSE) 107 | 100 0x0414 Д (CYRILLIC CAPITAL LETTER DE) 108 | 101 0x0415 Е (CYRILLIC CAPITAL LETTER IE) 109 | 102 0x0424 Ф (CYRILLIC CAPITAL LETTER EF) 110 | 103 0x0413 Г (CYRILLIC CAPITAL LETTER GHE) 111 | 104 0x0425 Х (CYRILLIC CAPITAL LETTER HA) 112 | 105 0x0418 И (CYRILLIC CAPITAL LETTER I) 113 | 106 0x0419 Й (CYRILLIC CAPITAL LETTER SHORT I) 114 | 107 0x041A К (CYRILLIC CAPITAL LETTER KA) 115 | 108 0x041B Л (CYRILLIC CAPITAL LETTER EL) 116 | 109 0x041C М (CYRILLIC CAPITAL LETTER EM) 117 | 110 0x041D Н (CYRILLIC CAPITAL LETTER EN) 118 | 111 0x041E О (CYRILLIC CAPITAL LETTER O) 119 | 112 0x041F П (CYRILLIC CAPITAL LETTER PE) 120 | 113 0x042F Я (CYRILLIC CAPITAL LETTER YA) 121 | 114 0x0420 Р (CYRILLIC CAPITAL LETTER ER) 122 | 115 0x0421 С (CYRILLIC CAPITAL LETTER ES) 123 | 116 0x0422 Т (CYRILLIC CAPITAL LETTER TE) 124 | 117 0x0423 У (CYRILLIC CAPITAL LETTER U) 125 | 118 0x0416 Ж (CYRILLIC CAPITAL LETTER ZHE) 126 | 119 0x0412 В (CYRILLIC CAPITAL LETTER VE) 127 | 120 0x042C Ь (CYRILLIC CAPITAL LETTER SOFT SIGN) 128 | 121 0x042B Ы (CYRILLIC CAPITAL LETTER YERU) 129 | 122 0x0417 З (CYRILLIC CAPITAL LETTER ZE) 130 | 123 0x0428 Ш (CYRILLIC CAPITAL LETTER SHA) 131 | 124 0x042D Э (CYRILLIC CAPITAL LETTER E) 132 | 125 0x0429 Щ (CYRILLIC CAPITAL LETTER SHCHA) 133 | 126 0x0427 Ч (CYRILLIC CAPITAL LETTER CHE) 134 | 127 0x042A Ъ (CYRILLIC CAPITAL LETTER HARD SIGN) 135 | -------------------------------------------------------------------------------- /encodings/index-koi8-u.txt: -------------------------------------------------------------------------------- 1 | # For details on index index-koi8-u.txt see the Encoding Standard 2 | # https://encoding.spec.whatwg.org/ 3 | # 4 | # Identifier: 19a4da2c3f245118bbc8019326f45a07832949938ff903f03d62ac4da1f61f40 5 | # Date: 2018-01-06 6 | 7 | 0 0x2500 ─ (BOX DRAWINGS LIGHT HORIZONTAL) 8 | 1 0x2502 │ (BOX DRAWINGS LIGHT VERTICAL) 9 | 2 0x250C ┌ (BOX DRAWINGS LIGHT DOWN AND RIGHT) 10 | 3 0x2510 ┐ (BOX DRAWINGS LIGHT DOWN AND LEFT) 11 | 4 0x2514 └ (BOX DRAWINGS LIGHT UP AND RIGHT) 12 | 5 0x2518 ┘ (BOX DRAWINGS LIGHT UP AND LEFT) 13 | 6 0x251C ├ (BOX DRAWINGS LIGHT VERTICAL AND RIGHT) 14 | 7 0x2524 ┤ (BOX DRAWINGS LIGHT VERTICAL AND LEFT) 15 | 8 0x252C ┬ (BOX DRAWINGS LIGHT DOWN AND HORIZONTAL) 16 | 9 0x2534 ┴ (BOX DRAWINGS LIGHT UP AND HORIZONTAL) 17 | 10 0x253C ┼ (BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL) 18 | 11 0x2580 ▀ (UPPER HALF BLOCK) 19 | 12 0x2584 ▄ (LOWER HALF BLOCK) 20 | 13 0x2588 █ (FULL BLOCK) 21 | 14 0x258C ▌ (LEFT HALF BLOCK) 22 | 15 0x2590 ▐ (RIGHT HALF BLOCK) 23 | 16 0x2591 ░ (LIGHT SHADE) 24 | 17 0x2592 ▒ (MEDIUM SHADE) 25 | 18 0x2593 ▓ (DARK SHADE) 26 | 19 0x2320 ⌠ (TOP HALF INTEGRAL) 27 | 20 0x25A0 ■ (BLACK SQUARE) 28 | 21 0x2219 ∙ (BULLET OPERATOR) 29 | 22 0x221A √ (SQUARE ROOT) 30 | 23 0x2248 ≈ (ALMOST EQUAL TO) 31 | 24 0x2264 ≤ (LESS-THAN OR EQUAL TO) 32 | 25 0x2265 ≥ (GREATER-THAN OR EQUAL TO) 33 | 26 0x00A0   (NO-BREAK SPACE) 34 | 27 0x2321 ⌡ (BOTTOM HALF INTEGRAL) 35 | 28 0x00B0 ° (DEGREE SIGN) 36 | 29 0x00B2 ² (SUPERSCRIPT TWO) 37 | 30 0x00B7 · (MIDDLE DOT) 38 | 31 0x00F7 ÷ (DIVISION SIGN) 39 | 32 0x2550 ═ (BOX DRAWINGS DOUBLE HORIZONTAL) 40 | 33 0x2551 ║ (BOX DRAWINGS DOUBLE VERTICAL) 41 | 34 0x2552 ╒ (BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE) 42 | 35 0x0451 ё (CYRILLIC SMALL LETTER IO) 43 | 36 0x0454 є (CYRILLIC SMALL LETTER UKRAINIAN IE) 44 | 37 0x2554 ╔ (BOX DRAWINGS DOUBLE DOWN AND RIGHT) 45 | 38 0x0456 і (CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I) 46 | 39 0x0457 ї (CYRILLIC SMALL LETTER YI) 47 | 40 0x2557 ╗ (BOX DRAWINGS DOUBLE DOWN AND LEFT) 48 | 41 0x2558 ╘ (BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE) 49 | 42 0x2559 ╙ (BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE) 50 | 43 0x255A ╚ (BOX DRAWINGS DOUBLE UP AND RIGHT) 51 | 44 0x255B ╛ (BOX DRAWINGS UP SINGLE AND LEFT DOUBLE) 52 | 45 0x0491 ґ (CYRILLIC SMALL LETTER GHE WITH UPTURN) 53 | 46 0x045E ў (CYRILLIC SMALL LETTER SHORT U) 54 | 47 0x255E ╞ (BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE) 55 | 48 0x255F ╟ (BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE) 56 | 49 0x2560 ╠ (BOX DRAWINGS DOUBLE VERTICAL AND RIGHT) 57 | 50 0x2561 ╡ (BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE) 58 | 51 0x0401 Ё (CYRILLIC CAPITAL LETTER IO) 59 | 52 0x0404 Є (CYRILLIC CAPITAL LETTER UKRAINIAN IE) 60 | 53 0x2563 ╣ (BOX DRAWINGS DOUBLE VERTICAL AND LEFT) 61 | 54 0x0406 І (CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I) 62 | 55 0x0407 Ї (CYRILLIC CAPITAL LETTER YI) 63 | 56 0x2566 ╦ (BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL) 64 | 57 0x2567 ╧ (BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE) 65 | 58 0x2568 ╨ (BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE) 66 | 59 0x2569 ╩ (BOX DRAWINGS DOUBLE UP AND HORIZONTAL) 67 | 60 0x256A ╪ (BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE) 68 | 61 0x0490 Ґ (CYRILLIC CAPITAL LETTER GHE WITH UPTURN) 69 | 62 0x040E Ў (CYRILLIC CAPITAL LETTER SHORT U) 70 | 63 0x00A9 © (COPYRIGHT SIGN) 71 | 64 0x044E ю (CYRILLIC SMALL LETTER YU) 72 | 65 0x0430 а (CYRILLIC SMALL LETTER A) 73 | 66 0x0431 б (CYRILLIC SMALL LETTER BE) 74 | 67 0x0446 ц (CYRILLIC SMALL LETTER TSE) 75 | 68 0x0434 д (CYRILLIC SMALL LETTER DE) 76 | 69 0x0435 е (CYRILLIC SMALL LETTER IE) 77 | 70 0x0444 ф (CYRILLIC SMALL LETTER EF) 78 | 71 0x0433 г (CYRILLIC SMALL LETTER GHE) 79 | 72 0x0445 х (CYRILLIC SMALL LETTER HA) 80 | 73 0x0438 и (CYRILLIC SMALL LETTER I) 81 | 74 0x0439 й (CYRILLIC SMALL LETTER SHORT I) 82 | 75 0x043A к (CYRILLIC SMALL LETTER KA) 83 | 76 0x043B л (CYRILLIC SMALL LETTER EL) 84 | 77 0x043C м (CYRILLIC SMALL LETTER EM) 85 | 78 0x043D н (CYRILLIC SMALL LETTER EN) 86 | 79 0x043E о (CYRILLIC SMALL LETTER O) 87 | 80 0x043F п (CYRILLIC SMALL LETTER PE) 88 | 81 0x044F я (CYRILLIC SMALL LETTER YA) 89 | 82 0x0440 р (CYRILLIC SMALL LETTER ER) 90 | 83 0x0441 с (CYRILLIC SMALL LETTER ES) 91 | 84 0x0442 т (CYRILLIC SMALL LETTER TE) 92 | 85 0x0443 у (CYRILLIC SMALL LETTER U) 93 | 86 0x0436 ж (CYRILLIC SMALL LETTER ZHE) 94 | 87 0x0432 в (CYRILLIC SMALL LETTER VE) 95 | 88 0x044C ь (CYRILLIC SMALL LETTER SOFT SIGN) 96 | 89 0x044B ы (CYRILLIC SMALL LETTER YERU) 97 | 90 0x0437 з (CYRILLIC SMALL LETTER ZE) 98 | 91 0x0448 ш (CYRILLIC SMALL LETTER SHA) 99 | 92 0x044D э (CYRILLIC SMALL LETTER E) 100 | 93 0x0449 щ (CYRILLIC SMALL LETTER SHCHA) 101 | 94 0x0447 ч (CYRILLIC SMALL LETTER CHE) 102 | 95 0x044A ъ (CYRILLIC SMALL LETTER HARD SIGN) 103 | 96 0x042E Ю (CYRILLIC CAPITAL LETTER YU) 104 | 97 0x0410 А (CYRILLIC CAPITAL LETTER A) 105 | 98 0x0411 Б (CYRILLIC CAPITAL LETTER BE) 106 | 99 0x0426 Ц (CYRILLIC CAPITAL LETTER TSE) 107 | 100 0x0414 Д (CYRILLIC CAPITAL LETTER DE) 108 | 101 0x0415 Е (CYRILLIC CAPITAL LETTER IE) 109 | 102 0x0424 Ф (CYRILLIC CAPITAL LETTER EF) 110 | 103 0x0413 Г (CYRILLIC CAPITAL LETTER GHE) 111 | 104 0x0425 Х (CYRILLIC CAPITAL LETTER HA) 112 | 105 0x0418 И (CYRILLIC CAPITAL LETTER I) 113 | 106 0x0419 Й (CYRILLIC CAPITAL LETTER SHORT I) 114 | 107 0x041A К (CYRILLIC CAPITAL LETTER KA) 115 | 108 0x041B Л (CYRILLIC CAPITAL LETTER EL) 116 | 109 0x041C М (CYRILLIC CAPITAL LETTER EM) 117 | 110 0x041D Н (CYRILLIC CAPITAL LETTER EN) 118 | 111 0x041E О (CYRILLIC CAPITAL LETTER O) 119 | 112 0x041F П (CYRILLIC CAPITAL LETTER PE) 120 | 113 0x042F Я (CYRILLIC CAPITAL LETTER YA) 121 | 114 0x0420 Р (CYRILLIC CAPITAL LETTER ER) 122 | 115 0x0421 С (CYRILLIC CAPITAL LETTER ES) 123 | 116 0x0422 Т (CYRILLIC CAPITAL LETTER TE) 124 | 117 0x0423 У (CYRILLIC CAPITAL LETTER U) 125 | 118 0x0416 Ж (CYRILLIC CAPITAL LETTER ZHE) 126 | 119 0x0412 В (CYRILLIC CAPITAL LETTER VE) 127 | 120 0x042C Ь (CYRILLIC CAPITAL LETTER SOFT SIGN) 128 | 121 0x042B Ы (CYRILLIC CAPITAL LETTER YERU) 129 | 122 0x0417 З (CYRILLIC CAPITAL LETTER ZE) 130 | 123 0x0428 Ш (CYRILLIC CAPITAL LETTER SHA) 131 | 124 0x042D Э (CYRILLIC CAPITAL LETTER E) 132 | 125 0x0429 Щ (CYRILLIC CAPITAL LETTER SHCHA) 133 | 126 0x0427 Ч (CYRILLIC CAPITAL LETTER CHE) 134 | 127 0x042A Ъ (CYRILLIC CAPITAL LETTER HARD SIGN) 135 | -------------------------------------------------------------------------------- /encodings/index-macintosh.txt: -------------------------------------------------------------------------------- 1 | # For details on index index-macintosh.txt see the Encoding Standard 2 | # https://encoding.spec.whatwg.org/ 3 | # 4 | # Identifier: f2c6a4f6406b3e86a50a5dba4d2b7dd48e2e33c0d82aefe764535c934ec11764 5 | # Date: 2018-01-06 6 | 7 | 0 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS) 8 | 1 0x00C5 Å (LATIN CAPITAL LETTER A WITH RING ABOVE) 9 | 2 0x00C7 Ç (LATIN CAPITAL LETTER C WITH CEDILLA) 10 | 3 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE) 11 | 4 0x00D1 Ñ (LATIN CAPITAL LETTER N WITH TILDE) 12 | 5 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS) 13 | 6 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS) 14 | 7 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE) 15 | 8 0x00E0 à (LATIN SMALL LETTER A WITH GRAVE) 16 | 9 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX) 17 | 10 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS) 18 | 11 0x00E3 ã (LATIN SMALL LETTER A WITH TILDE) 19 | 12 0x00E5 å (LATIN SMALL LETTER A WITH RING ABOVE) 20 | 13 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA) 21 | 14 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE) 22 | 15 0x00E8 è (LATIN SMALL LETTER E WITH GRAVE) 23 | 16 0x00EA ê (LATIN SMALL LETTER E WITH CIRCUMFLEX) 24 | 17 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS) 25 | 18 0x00ED í (LATIN SMALL LETTER I WITH ACUTE) 26 | 19 0x00EC ì (LATIN SMALL LETTER I WITH GRAVE) 27 | 20 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX) 28 | 21 0x00EF ï (LATIN SMALL LETTER I WITH DIAERESIS) 29 | 22 0x00F1 ñ (LATIN SMALL LETTER N WITH TILDE) 30 | 23 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE) 31 | 24 0x00F2 ò (LATIN SMALL LETTER O WITH GRAVE) 32 | 25 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX) 33 | 26 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS) 34 | 27 0x00F5 õ (LATIN SMALL LETTER O WITH TILDE) 35 | 28 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE) 36 | 29 0x00F9 ù (LATIN SMALL LETTER U WITH GRAVE) 37 | 30 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX) 38 | 31 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS) 39 | 32 0x2020 † (DAGGER) 40 | 33 0x00B0 ° (DEGREE SIGN) 41 | 34 0x00A2 ¢ (CENT SIGN) 42 | 35 0x00A3 £ (POUND SIGN) 43 | 36 0x00A7 § (SECTION SIGN) 44 | 37 0x2022 • (BULLET) 45 | 38 0x00B6 ¶ (PILCROW SIGN) 46 | 39 0x00DF ß (LATIN SMALL LETTER SHARP S) 47 | 40 0x00AE ® (REGISTERED SIGN) 48 | 41 0x00A9 © (COPYRIGHT SIGN) 49 | 42 0x2122 ™ (TRADE MARK SIGN) 50 | 43 0x00B4 ´ (ACUTE ACCENT) 51 | 44 0x00A8 ¨ (DIAERESIS) 52 | 45 0x2260 ≠ (NOT EQUAL TO) 53 | 46 0x00C6 Æ (LATIN CAPITAL LETTER AE) 54 | 47 0x00D8 Ø (LATIN CAPITAL LETTER O WITH STROKE) 55 | 48 0x221E ∞ (INFINITY) 56 | 49 0x00B1 ± (PLUS-MINUS SIGN) 57 | 50 0x2264 ≤ (LESS-THAN OR EQUAL TO) 58 | 51 0x2265 ≥ (GREATER-THAN OR EQUAL TO) 59 | 52 0x00A5 ¥ (YEN SIGN) 60 | 53 0x00B5 µ (MICRO SIGN) 61 | 54 0x2202 ∂ (PARTIAL DIFFERENTIAL) 62 | 55 0x2211 ∑ (N-ARY SUMMATION) 63 | 56 0x220F ∏ (N-ARY PRODUCT) 64 | 57 0x03C0 π (GREEK SMALL LETTER PI) 65 | 58 0x222B ∫ (INTEGRAL) 66 | 59 0x00AA ª (FEMININE ORDINAL INDICATOR) 67 | 60 0x00BA º (MASCULINE ORDINAL INDICATOR) 68 | 61 0x03A9 Ω (GREEK CAPITAL LETTER OMEGA) 69 | 62 0x00E6 æ (LATIN SMALL LETTER AE) 70 | 63 0x00F8 ø (LATIN SMALL LETTER O WITH STROKE) 71 | 64 0x00BF ¿ (INVERTED QUESTION MARK) 72 | 65 0x00A1 ¡ (INVERTED EXCLAMATION MARK) 73 | 66 0x00AC ¬ (NOT SIGN) 74 | 67 0x221A √ (SQUARE ROOT) 75 | 68 0x0192 ƒ (LATIN SMALL LETTER F WITH HOOK) 76 | 69 0x2248 ≈ (ALMOST EQUAL TO) 77 | 70 0x2206 ∆ (INCREMENT) 78 | 71 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) 79 | 72 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) 80 | 73 0x2026 … (HORIZONTAL ELLIPSIS) 81 | 74 0x00A0   (NO-BREAK SPACE) 82 | 75 0x00C0 À (LATIN CAPITAL LETTER A WITH GRAVE) 83 | 76 0x00C3 à (LATIN CAPITAL LETTER A WITH TILDE) 84 | 77 0x00D5 Õ (LATIN CAPITAL LETTER O WITH TILDE) 85 | 78 0x0152 Œ (LATIN CAPITAL LIGATURE OE) 86 | 79 0x0153 œ (LATIN SMALL LIGATURE OE) 87 | 80 0x2013 – (EN DASH) 88 | 81 0x2014 — (EM DASH) 89 | 82 0x201C “ (LEFT DOUBLE QUOTATION MARK) 90 | 83 0x201D ” (RIGHT DOUBLE QUOTATION MARK) 91 | 84 0x2018 ‘ (LEFT SINGLE QUOTATION MARK) 92 | 85 0x2019 ’ (RIGHT SINGLE QUOTATION MARK) 93 | 86 0x00F7 ÷ (DIVISION SIGN) 94 | 87 0x25CA ◊ (LOZENGE) 95 | 88 0x00FF ÿ (LATIN SMALL LETTER Y WITH DIAERESIS) 96 | 89 0x0178 Ÿ (LATIN CAPITAL LETTER Y WITH DIAERESIS) 97 | 90 0x2044 ⁄ (FRACTION SLASH) 98 | 91 0x20AC € (EURO SIGN) 99 | 92 0x2039 ‹ (SINGLE LEFT-POINTING ANGLE QUOTATION MARK) 100 | 93 0x203A › (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK) 101 | 94 0xFB01 fi (LATIN SMALL LIGATURE FI) 102 | 95 0xFB02 fl (LATIN SMALL LIGATURE FL) 103 | 96 0x2021 ‡ (DOUBLE DAGGER) 104 | 97 0x00B7 · (MIDDLE DOT) 105 | 98 0x201A ‚ (SINGLE LOW-9 QUOTATION MARK) 106 | 99 0x201E „ (DOUBLE LOW-9 QUOTATION MARK) 107 | 100 0x2030 ‰ (PER MILLE SIGN) 108 | 101 0x00C2  (LATIN CAPITAL LETTER A WITH CIRCUMFLEX) 109 | 102 0x00CA Ê (LATIN CAPITAL LETTER E WITH CIRCUMFLEX) 110 | 103 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE) 111 | 104 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS) 112 | 105 0x00C8 È (LATIN CAPITAL LETTER E WITH GRAVE) 113 | 106 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE) 114 | 107 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX) 115 | 108 0x00CF Ï (LATIN CAPITAL LETTER I WITH DIAERESIS) 116 | 109 0x00CC Ì (LATIN CAPITAL LETTER I WITH GRAVE) 117 | 110 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE) 118 | 111 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX) 119 | 112 0xF8FF  () 120 | 113 0x00D2 Ò (LATIN CAPITAL LETTER O WITH GRAVE) 121 | 114 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE) 122 | 115 0x00DB Û (LATIN CAPITAL LETTER U WITH CIRCUMFLEX) 123 | 116 0x00D9 Ù (LATIN CAPITAL LETTER U WITH GRAVE) 124 | 117 0x0131 ı (LATIN SMALL LETTER DOTLESS I) 125 | 118 0x02C6 ˆ (MODIFIER LETTER CIRCUMFLEX ACCENT) 126 | 119 0x02DC ˜ (SMALL TILDE) 127 | 120 0x00AF ¯ (MACRON) 128 | 121 0x02D8 ˘ (BREVE) 129 | 122 0x02D9 ˙ (DOT ABOVE) 130 | 123 0x02DA ˚ (RING ABOVE) 131 | 124 0x00B8 ¸ (CEDILLA) 132 | 125 0x02DD ˝ (DOUBLE ACUTE ACCENT) 133 | 126 0x02DB ˛ (OGONEK) 134 | 127 0x02C7 ˇ (CARON) 135 | -------------------------------------------------------------------------------- /encodings/index-windows-1250.txt: -------------------------------------------------------------------------------- 1 | # For details on index index-windows-1250.txt see the Encoding Standard 2 | # https://encoding.spec.whatwg.org/ 3 | # 4 | # Identifier: 0669455a7a1c70ba6003ea737991e8ee9adc455125c13cfe6705a361358de5fa 5 | # Date: 2018-01-06 6 | 7 | 0 0x20AC € (EURO SIGN) 8 | 1 0x0081  () 9 | 2 0x201A ‚ (SINGLE LOW-9 QUOTATION MARK) 10 | 3 0x0083 ƒ () 11 | 4 0x201E „ (DOUBLE LOW-9 QUOTATION MARK) 12 | 5 0x2026 … (HORIZONTAL ELLIPSIS) 13 | 6 0x2020 † (DAGGER) 14 | 7 0x2021 ‡ (DOUBLE DAGGER) 15 | 8 0x0088 ˆ () 16 | 9 0x2030 ‰ (PER MILLE SIGN) 17 | 10 0x0160 Š (LATIN CAPITAL LETTER S WITH CARON) 18 | 11 0x2039 ‹ (SINGLE LEFT-POINTING ANGLE QUOTATION MARK) 19 | 12 0x015A Ś (LATIN CAPITAL LETTER S WITH ACUTE) 20 | 13 0x0164 Ť (LATIN CAPITAL LETTER T WITH CARON) 21 | 14 0x017D Ž (LATIN CAPITAL LETTER Z WITH CARON) 22 | 15 0x0179 Ź (LATIN CAPITAL LETTER Z WITH ACUTE) 23 | 16 0x0090  () 24 | 17 0x2018 ‘ (LEFT SINGLE QUOTATION MARK) 25 | 18 0x2019 ’ (RIGHT SINGLE QUOTATION MARK) 26 | 19 0x201C “ (LEFT DOUBLE QUOTATION MARK) 27 | 20 0x201D ” (RIGHT DOUBLE QUOTATION MARK) 28 | 21 0x2022 • (BULLET) 29 | 22 0x2013 – (EN DASH) 30 | 23 0x2014 — (EM DASH) 31 | 24 0x0098 ˜ () 32 | 25 0x2122 ™ (TRADE MARK SIGN) 33 | 26 0x0161 š (LATIN SMALL LETTER S WITH CARON) 34 | 27 0x203A › (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK) 35 | 28 0x015B ś (LATIN SMALL LETTER S WITH ACUTE) 36 | 29 0x0165 ť (LATIN SMALL LETTER T WITH CARON) 37 | 30 0x017E ž (LATIN SMALL LETTER Z WITH CARON) 38 | 31 0x017A ź (LATIN SMALL LETTER Z WITH ACUTE) 39 | 32 0x00A0   (NO-BREAK SPACE) 40 | 33 0x02C7 ˇ (CARON) 41 | 34 0x02D8 ˘ (BREVE) 42 | 35 0x0141 Ł (LATIN CAPITAL LETTER L WITH STROKE) 43 | 36 0x00A4 ¤ (CURRENCY SIGN) 44 | 37 0x0104 Ą (LATIN CAPITAL LETTER A WITH OGONEK) 45 | 38 0x00A6 ¦ (BROKEN BAR) 46 | 39 0x00A7 § (SECTION SIGN) 47 | 40 0x00A8 ¨ (DIAERESIS) 48 | 41 0x00A9 © (COPYRIGHT SIGN) 49 | 42 0x015E Ş (LATIN CAPITAL LETTER S WITH CEDILLA) 50 | 43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) 51 | 44 0x00AC ¬ (NOT SIGN) 52 | 45 0x00AD ­ (SOFT HYPHEN) 53 | 46 0x00AE ® (REGISTERED SIGN) 54 | 47 0x017B Ż (LATIN CAPITAL LETTER Z WITH DOT ABOVE) 55 | 48 0x00B0 ° (DEGREE SIGN) 56 | 49 0x00B1 ± (PLUS-MINUS SIGN) 57 | 50 0x02DB ˛ (OGONEK) 58 | 51 0x0142 ł (LATIN SMALL LETTER L WITH STROKE) 59 | 52 0x00B4 ´ (ACUTE ACCENT) 60 | 53 0x00B5 µ (MICRO SIGN) 61 | 54 0x00B6 ¶ (PILCROW SIGN) 62 | 55 0x00B7 · (MIDDLE DOT) 63 | 56 0x00B8 ¸ (CEDILLA) 64 | 57 0x0105 ą (LATIN SMALL LETTER A WITH OGONEK) 65 | 58 0x015F ş (LATIN SMALL LETTER S WITH CEDILLA) 66 | 59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) 67 | 60 0x013D Ľ (LATIN CAPITAL LETTER L WITH CARON) 68 | 61 0x02DD ˝ (DOUBLE ACUTE ACCENT) 69 | 62 0x013E ľ (LATIN SMALL LETTER L WITH CARON) 70 | 63 0x017C ż (LATIN SMALL LETTER Z WITH DOT ABOVE) 71 | 64 0x0154 Ŕ (LATIN CAPITAL LETTER R WITH ACUTE) 72 | 65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE) 73 | 66 0x00C2  (LATIN CAPITAL LETTER A WITH CIRCUMFLEX) 74 | 67 0x0102 Ă (LATIN CAPITAL LETTER A WITH BREVE) 75 | 68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS) 76 | 69 0x0139 Ĺ (LATIN CAPITAL LETTER L WITH ACUTE) 77 | 70 0x0106 Ć (LATIN CAPITAL LETTER C WITH ACUTE) 78 | 71 0x00C7 Ç (LATIN CAPITAL LETTER C WITH CEDILLA) 79 | 72 0x010C Č (LATIN CAPITAL LETTER C WITH CARON) 80 | 73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE) 81 | 74 0x0118 Ę (LATIN CAPITAL LETTER E WITH OGONEK) 82 | 75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS) 83 | 76 0x011A Ě (LATIN CAPITAL LETTER E WITH CARON) 84 | 77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE) 85 | 78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX) 86 | 79 0x010E Ď (LATIN CAPITAL LETTER D WITH CARON) 87 | 80 0x0110 Đ (LATIN CAPITAL LETTER D WITH STROKE) 88 | 81 0x0143 Ń (LATIN CAPITAL LETTER N WITH ACUTE) 89 | 82 0x0147 Ň (LATIN CAPITAL LETTER N WITH CARON) 90 | 83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE) 91 | 84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX) 92 | 85 0x0150 Ő (LATIN CAPITAL LETTER O WITH DOUBLE ACUTE) 93 | 86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS) 94 | 87 0x00D7 × (MULTIPLICATION SIGN) 95 | 88 0x0158 Ř (LATIN CAPITAL LETTER R WITH CARON) 96 | 89 0x016E Ů (LATIN CAPITAL LETTER U WITH RING ABOVE) 97 | 90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE) 98 | 91 0x0170 Ű (LATIN CAPITAL LETTER U WITH DOUBLE ACUTE) 99 | 92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS) 100 | 93 0x00DD Ý (LATIN CAPITAL LETTER Y WITH ACUTE) 101 | 94 0x0162 Ţ (LATIN CAPITAL LETTER T WITH CEDILLA) 102 | 95 0x00DF ß (LATIN SMALL LETTER SHARP S) 103 | 96 0x0155 ŕ (LATIN SMALL LETTER R WITH ACUTE) 104 | 97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE) 105 | 98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX) 106 | 99 0x0103 ă (LATIN SMALL LETTER A WITH BREVE) 107 | 100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS) 108 | 101 0x013A ĺ (LATIN SMALL LETTER L WITH ACUTE) 109 | 102 0x0107 ć (LATIN SMALL LETTER C WITH ACUTE) 110 | 103 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA) 111 | 104 0x010D č (LATIN SMALL LETTER C WITH CARON) 112 | 105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE) 113 | 106 0x0119 ę (LATIN SMALL LETTER E WITH OGONEK) 114 | 107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS) 115 | 108 0x011B ě (LATIN SMALL LETTER E WITH CARON) 116 | 109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE) 117 | 110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX) 118 | 111 0x010F ď (LATIN SMALL LETTER D WITH CARON) 119 | 112 0x0111 đ (LATIN SMALL LETTER D WITH STROKE) 120 | 113 0x0144 ń (LATIN SMALL LETTER N WITH ACUTE) 121 | 114 0x0148 ň (LATIN SMALL LETTER N WITH CARON) 122 | 115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE) 123 | 116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX) 124 | 117 0x0151 ő (LATIN SMALL LETTER O WITH DOUBLE ACUTE) 125 | 118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS) 126 | 119 0x00F7 ÷ (DIVISION SIGN) 127 | 120 0x0159 ř (LATIN SMALL LETTER R WITH CARON) 128 | 121 0x016F ů (LATIN SMALL LETTER U WITH RING ABOVE) 129 | 122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE) 130 | 123 0x0171 ű (LATIN SMALL LETTER U WITH DOUBLE ACUTE) 131 | 124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS) 132 | 125 0x00FD ý (LATIN SMALL LETTER Y WITH ACUTE) 133 | 126 0x0163 ţ (LATIN SMALL LETTER T WITH CEDILLA) 134 | 127 0x02D9 ˙ (DOT ABOVE) 135 | -------------------------------------------------------------------------------- /encodings/index-windows-1251.txt: -------------------------------------------------------------------------------- 1 | # For details on index index-windows-1251.txt see the Encoding Standard 2 | # https://encoding.spec.whatwg.org/ 3 | # 4 | # Identifier: 7592ef921679ba168b00a9e9afa3b4eebd67bf13dc7e84c4b6e120de856826e0 5 | # Date: 2018-01-06 6 | 7 | 0 0x0402 Ђ (CYRILLIC CAPITAL LETTER DJE) 8 | 1 0x0403 Ѓ (CYRILLIC CAPITAL LETTER GJE) 9 | 2 0x201A ‚ (SINGLE LOW-9 QUOTATION MARK) 10 | 3 0x0453 ѓ (CYRILLIC SMALL LETTER GJE) 11 | 4 0x201E „ (DOUBLE LOW-9 QUOTATION MARK) 12 | 5 0x2026 … (HORIZONTAL ELLIPSIS) 13 | 6 0x2020 † (DAGGER) 14 | 7 0x2021 ‡ (DOUBLE DAGGER) 15 | 8 0x20AC € (EURO SIGN) 16 | 9 0x2030 ‰ (PER MILLE SIGN) 17 | 10 0x0409 Љ (CYRILLIC CAPITAL LETTER LJE) 18 | 11 0x2039 ‹ (SINGLE LEFT-POINTING ANGLE QUOTATION MARK) 19 | 12 0x040A Њ (CYRILLIC CAPITAL LETTER NJE) 20 | 13 0x040C Ќ (CYRILLIC CAPITAL LETTER KJE) 21 | 14 0x040B Ћ (CYRILLIC CAPITAL LETTER TSHE) 22 | 15 0x040F Џ (CYRILLIC CAPITAL LETTER DZHE) 23 | 16 0x0452 ђ (CYRILLIC SMALL LETTER DJE) 24 | 17 0x2018 ‘ (LEFT SINGLE QUOTATION MARK) 25 | 18 0x2019 ’ (RIGHT SINGLE QUOTATION MARK) 26 | 19 0x201C “ (LEFT DOUBLE QUOTATION MARK) 27 | 20 0x201D ” (RIGHT DOUBLE QUOTATION MARK) 28 | 21 0x2022 • (BULLET) 29 | 22 0x2013 – (EN DASH) 30 | 23 0x2014 — (EM DASH) 31 | 24 0x0098 ˜ () 32 | 25 0x2122 ™ (TRADE MARK SIGN) 33 | 26 0x0459 љ (CYRILLIC SMALL LETTER LJE) 34 | 27 0x203A › (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK) 35 | 28 0x045A њ (CYRILLIC SMALL LETTER NJE) 36 | 29 0x045C ќ (CYRILLIC SMALL LETTER KJE) 37 | 30 0x045B ћ (CYRILLIC SMALL LETTER TSHE) 38 | 31 0x045F џ (CYRILLIC SMALL LETTER DZHE) 39 | 32 0x00A0   (NO-BREAK SPACE) 40 | 33 0x040E Ў (CYRILLIC CAPITAL LETTER SHORT U) 41 | 34 0x045E ў (CYRILLIC SMALL LETTER SHORT U) 42 | 35 0x0408 Ј (CYRILLIC CAPITAL LETTER JE) 43 | 36 0x00A4 ¤ (CURRENCY SIGN) 44 | 37 0x0490 Ґ (CYRILLIC CAPITAL LETTER GHE WITH UPTURN) 45 | 38 0x00A6 ¦ (BROKEN BAR) 46 | 39 0x00A7 § (SECTION SIGN) 47 | 40 0x0401 Ё (CYRILLIC CAPITAL LETTER IO) 48 | 41 0x00A9 © (COPYRIGHT SIGN) 49 | 42 0x0404 Є (CYRILLIC CAPITAL LETTER UKRAINIAN IE) 50 | 43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) 51 | 44 0x00AC ¬ (NOT SIGN) 52 | 45 0x00AD ­ (SOFT HYPHEN) 53 | 46 0x00AE ® (REGISTERED SIGN) 54 | 47 0x0407 Ї (CYRILLIC CAPITAL LETTER YI) 55 | 48 0x00B0 ° (DEGREE SIGN) 56 | 49 0x00B1 ± (PLUS-MINUS SIGN) 57 | 50 0x0406 І (CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I) 58 | 51 0x0456 і (CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I) 59 | 52 0x0491 ґ (CYRILLIC SMALL LETTER GHE WITH UPTURN) 60 | 53 0x00B5 µ (MICRO SIGN) 61 | 54 0x00B6 ¶ (PILCROW SIGN) 62 | 55 0x00B7 · (MIDDLE DOT) 63 | 56 0x0451 ё (CYRILLIC SMALL LETTER IO) 64 | 57 0x2116 № (NUMERO SIGN) 65 | 58 0x0454 є (CYRILLIC SMALL LETTER UKRAINIAN IE) 66 | 59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) 67 | 60 0x0458 ј (CYRILLIC SMALL LETTER JE) 68 | 61 0x0405 Ѕ (CYRILLIC CAPITAL LETTER DZE) 69 | 62 0x0455 ѕ (CYRILLIC SMALL LETTER DZE) 70 | 63 0x0457 ї (CYRILLIC SMALL LETTER YI) 71 | 64 0x0410 А (CYRILLIC CAPITAL LETTER A) 72 | 65 0x0411 Б (CYRILLIC CAPITAL LETTER BE) 73 | 66 0x0412 В (CYRILLIC CAPITAL LETTER VE) 74 | 67 0x0413 Г (CYRILLIC CAPITAL LETTER GHE) 75 | 68 0x0414 Д (CYRILLIC CAPITAL LETTER DE) 76 | 69 0x0415 Е (CYRILLIC CAPITAL LETTER IE) 77 | 70 0x0416 Ж (CYRILLIC CAPITAL LETTER ZHE) 78 | 71 0x0417 З (CYRILLIC CAPITAL LETTER ZE) 79 | 72 0x0418 И (CYRILLIC CAPITAL LETTER I) 80 | 73 0x0419 Й (CYRILLIC CAPITAL LETTER SHORT I) 81 | 74 0x041A К (CYRILLIC CAPITAL LETTER KA) 82 | 75 0x041B Л (CYRILLIC CAPITAL LETTER EL) 83 | 76 0x041C М (CYRILLIC CAPITAL LETTER EM) 84 | 77 0x041D Н (CYRILLIC CAPITAL LETTER EN) 85 | 78 0x041E О (CYRILLIC CAPITAL LETTER O) 86 | 79 0x041F П (CYRILLIC CAPITAL LETTER PE) 87 | 80 0x0420 Р (CYRILLIC CAPITAL LETTER ER) 88 | 81 0x0421 С (CYRILLIC CAPITAL LETTER ES) 89 | 82 0x0422 Т (CYRILLIC CAPITAL LETTER TE) 90 | 83 0x0423 У (CYRILLIC CAPITAL LETTER U) 91 | 84 0x0424 Ф (CYRILLIC CAPITAL LETTER EF) 92 | 85 0x0425 Х (CYRILLIC CAPITAL LETTER HA) 93 | 86 0x0426 Ц (CYRILLIC CAPITAL LETTER TSE) 94 | 87 0x0427 Ч (CYRILLIC CAPITAL LETTER CHE) 95 | 88 0x0428 Ш (CYRILLIC CAPITAL LETTER SHA) 96 | 89 0x0429 Щ (CYRILLIC CAPITAL LETTER SHCHA) 97 | 90 0x042A Ъ (CYRILLIC CAPITAL LETTER HARD SIGN) 98 | 91 0x042B Ы (CYRILLIC CAPITAL LETTER YERU) 99 | 92 0x042C Ь (CYRILLIC CAPITAL LETTER SOFT SIGN) 100 | 93 0x042D Э (CYRILLIC CAPITAL LETTER E) 101 | 94 0x042E Ю (CYRILLIC CAPITAL LETTER YU) 102 | 95 0x042F Я (CYRILLIC CAPITAL LETTER YA) 103 | 96 0x0430 а (CYRILLIC SMALL LETTER A) 104 | 97 0x0431 б (CYRILLIC SMALL LETTER BE) 105 | 98 0x0432 в (CYRILLIC SMALL LETTER VE) 106 | 99 0x0433 г (CYRILLIC SMALL LETTER GHE) 107 | 100 0x0434 д (CYRILLIC SMALL LETTER DE) 108 | 101 0x0435 е (CYRILLIC SMALL LETTER IE) 109 | 102 0x0436 ж (CYRILLIC SMALL LETTER ZHE) 110 | 103 0x0437 з (CYRILLIC SMALL LETTER ZE) 111 | 104 0x0438 и (CYRILLIC SMALL LETTER I) 112 | 105 0x0439 й (CYRILLIC SMALL LETTER SHORT I) 113 | 106 0x043A к (CYRILLIC SMALL LETTER KA) 114 | 107 0x043B л (CYRILLIC SMALL LETTER EL) 115 | 108 0x043C м (CYRILLIC SMALL LETTER EM) 116 | 109 0x043D н (CYRILLIC SMALL LETTER EN) 117 | 110 0x043E о (CYRILLIC SMALL LETTER O) 118 | 111 0x043F п (CYRILLIC SMALL LETTER PE) 119 | 112 0x0440 р (CYRILLIC SMALL LETTER ER) 120 | 113 0x0441 с (CYRILLIC SMALL LETTER ES) 121 | 114 0x0442 т (CYRILLIC SMALL LETTER TE) 122 | 115 0x0443 у (CYRILLIC SMALL LETTER U) 123 | 116 0x0444 ф (CYRILLIC SMALL LETTER EF) 124 | 117 0x0445 х (CYRILLIC SMALL LETTER HA) 125 | 118 0x0446 ц (CYRILLIC SMALL LETTER TSE) 126 | 119 0x0447 ч (CYRILLIC SMALL LETTER CHE) 127 | 120 0x0448 ш (CYRILLIC SMALL LETTER SHA) 128 | 121 0x0449 щ (CYRILLIC SMALL LETTER SHCHA) 129 | 122 0x044A ъ (CYRILLIC SMALL LETTER HARD SIGN) 130 | 123 0x044B ы (CYRILLIC SMALL LETTER YERU) 131 | 124 0x044C ь (CYRILLIC SMALL LETTER SOFT SIGN) 132 | 125 0x044D э (CYRILLIC SMALL LETTER E) 133 | 126 0x044E ю (CYRILLIC SMALL LETTER YU) 134 | 127 0x044F я (CYRILLIC SMALL LETTER YA) 135 | -------------------------------------------------------------------------------- /encodings/index-windows-1252.txt: -------------------------------------------------------------------------------- 1 | # For details on index index-windows-1252.txt see the Encoding Standard 2 | # https://encoding.spec.whatwg.org/ 3 | # 4 | # Identifier: e56d49d9176e9a412283cf29ac9bd613f5620462f2a080a84eceaf974cfa18b7 5 | # Date: 2018-01-06 6 | 7 | 0 0x20AC € (EURO SIGN) 8 | 1 0x0081  () 9 | 2 0x201A ‚ (SINGLE LOW-9 QUOTATION MARK) 10 | 3 0x0192 ƒ (LATIN SMALL LETTER F WITH HOOK) 11 | 4 0x201E „ (DOUBLE LOW-9 QUOTATION MARK) 12 | 5 0x2026 … (HORIZONTAL ELLIPSIS) 13 | 6 0x2020 † (DAGGER) 14 | 7 0x2021 ‡ (DOUBLE DAGGER) 15 | 8 0x02C6 ˆ (MODIFIER LETTER CIRCUMFLEX ACCENT) 16 | 9 0x2030 ‰ (PER MILLE SIGN) 17 | 10 0x0160 Š (LATIN CAPITAL LETTER S WITH CARON) 18 | 11 0x2039 ‹ (SINGLE LEFT-POINTING ANGLE QUOTATION MARK) 19 | 12 0x0152 Œ (LATIN CAPITAL LIGATURE OE) 20 | 13 0x008D  () 21 | 14 0x017D Ž (LATIN CAPITAL LETTER Z WITH CARON) 22 | 15 0x008F  () 23 | 16 0x0090  () 24 | 17 0x2018 ‘ (LEFT SINGLE QUOTATION MARK) 25 | 18 0x2019 ’ (RIGHT SINGLE QUOTATION MARK) 26 | 19 0x201C “ (LEFT DOUBLE QUOTATION MARK) 27 | 20 0x201D ” (RIGHT DOUBLE QUOTATION MARK) 28 | 21 0x2022 • (BULLET) 29 | 22 0x2013 – (EN DASH) 30 | 23 0x2014 — (EM DASH) 31 | 24 0x02DC ˜ (SMALL TILDE) 32 | 25 0x2122 ™ (TRADE MARK SIGN) 33 | 26 0x0161 š (LATIN SMALL LETTER S WITH CARON) 34 | 27 0x203A › (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK) 35 | 28 0x0153 œ (LATIN SMALL LIGATURE OE) 36 | 29 0x009D  () 37 | 30 0x017E ž (LATIN SMALL LETTER Z WITH CARON) 38 | 31 0x0178 Ÿ (LATIN CAPITAL LETTER Y WITH DIAERESIS) 39 | 32 0x00A0   (NO-BREAK SPACE) 40 | 33 0x00A1 ¡ (INVERTED EXCLAMATION MARK) 41 | 34 0x00A2 ¢ (CENT SIGN) 42 | 35 0x00A3 £ (POUND SIGN) 43 | 36 0x00A4 ¤ (CURRENCY SIGN) 44 | 37 0x00A5 ¥ (YEN SIGN) 45 | 38 0x00A6 ¦ (BROKEN BAR) 46 | 39 0x00A7 § (SECTION SIGN) 47 | 40 0x00A8 ¨ (DIAERESIS) 48 | 41 0x00A9 © (COPYRIGHT SIGN) 49 | 42 0x00AA ª (FEMININE ORDINAL INDICATOR) 50 | 43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) 51 | 44 0x00AC ¬ (NOT SIGN) 52 | 45 0x00AD ­ (SOFT HYPHEN) 53 | 46 0x00AE ® (REGISTERED SIGN) 54 | 47 0x00AF ¯ (MACRON) 55 | 48 0x00B0 ° (DEGREE SIGN) 56 | 49 0x00B1 ± (PLUS-MINUS SIGN) 57 | 50 0x00B2 ² (SUPERSCRIPT TWO) 58 | 51 0x00B3 ³ (SUPERSCRIPT THREE) 59 | 52 0x00B4 ´ (ACUTE ACCENT) 60 | 53 0x00B5 µ (MICRO SIGN) 61 | 54 0x00B6 ¶ (PILCROW SIGN) 62 | 55 0x00B7 · (MIDDLE DOT) 63 | 56 0x00B8 ¸ (CEDILLA) 64 | 57 0x00B9 ¹ (SUPERSCRIPT ONE) 65 | 58 0x00BA º (MASCULINE ORDINAL INDICATOR) 66 | 59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) 67 | 60 0x00BC ¼ (VULGAR FRACTION ONE QUARTER) 68 | 61 0x00BD ½ (VULGAR FRACTION ONE HALF) 69 | 62 0x00BE ¾ (VULGAR FRACTION THREE QUARTERS) 70 | 63 0x00BF ¿ (INVERTED QUESTION MARK) 71 | 64 0x00C0 À (LATIN CAPITAL LETTER A WITH GRAVE) 72 | 65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE) 73 | 66 0x00C2  (LATIN CAPITAL LETTER A WITH CIRCUMFLEX) 74 | 67 0x00C3 à (LATIN CAPITAL LETTER A WITH TILDE) 75 | 68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS) 76 | 69 0x00C5 Å (LATIN CAPITAL LETTER A WITH RING ABOVE) 77 | 70 0x00C6 Æ (LATIN CAPITAL LETTER AE) 78 | 71 0x00C7 Ç (LATIN CAPITAL LETTER C WITH CEDILLA) 79 | 72 0x00C8 È (LATIN CAPITAL LETTER E WITH GRAVE) 80 | 73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE) 81 | 74 0x00CA Ê (LATIN CAPITAL LETTER E WITH CIRCUMFLEX) 82 | 75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS) 83 | 76 0x00CC Ì (LATIN CAPITAL LETTER I WITH GRAVE) 84 | 77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE) 85 | 78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX) 86 | 79 0x00CF Ï (LATIN CAPITAL LETTER I WITH DIAERESIS) 87 | 80 0x00D0 Ð (LATIN CAPITAL LETTER ETH) 88 | 81 0x00D1 Ñ (LATIN CAPITAL LETTER N WITH TILDE) 89 | 82 0x00D2 Ò (LATIN CAPITAL LETTER O WITH GRAVE) 90 | 83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE) 91 | 84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX) 92 | 85 0x00D5 Õ (LATIN CAPITAL LETTER O WITH TILDE) 93 | 86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS) 94 | 87 0x00D7 × (MULTIPLICATION SIGN) 95 | 88 0x00D8 Ø (LATIN CAPITAL LETTER O WITH STROKE) 96 | 89 0x00D9 Ù (LATIN CAPITAL LETTER U WITH GRAVE) 97 | 90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE) 98 | 91 0x00DB Û (LATIN CAPITAL LETTER U WITH CIRCUMFLEX) 99 | 92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS) 100 | 93 0x00DD Ý (LATIN CAPITAL LETTER Y WITH ACUTE) 101 | 94 0x00DE Þ (LATIN CAPITAL LETTER THORN) 102 | 95 0x00DF ß (LATIN SMALL LETTER SHARP S) 103 | 96 0x00E0 à (LATIN SMALL LETTER A WITH GRAVE) 104 | 97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE) 105 | 98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX) 106 | 99 0x00E3 ã (LATIN SMALL LETTER A WITH TILDE) 107 | 100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS) 108 | 101 0x00E5 å (LATIN SMALL LETTER A WITH RING ABOVE) 109 | 102 0x00E6 æ (LATIN SMALL LETTER AE) 110 | 103 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA) 111 | 104 0x00E8 è (LATIN SMALL LETTER E WITH GRAVE) 112 | 105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE) 113 | 106 0x00EA ê (LATIN SMALL LETTER E WITH CIRCUMFLEX) 114 | 107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS) 115 | 108 0x00EC ì (LATIN SMALL LETTER I WITH GRAVE) 116 | 109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE) 117 | 110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX) 118 | 111 0x00EF ï (LATIN SMALL LETTER I WITH DIAERESIS) 119 | 112 0x00F0 ð (LATIN SMALL LETTER ETH) 120 | 113 0x00F1 ñ (LATIN SMALL LETTER N WITH TILDE) 121 | 114 0x00F2 ò (LATIN SMALL LETTER O WITH GRAVE) 122 | 115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE) 123 | 116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX) 124 | 117 0x00F5 õ (LATIN SMALL LETTER O WITH TILDE) 125 | 118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS) 126 | 119 0x00F7 ÷ (DIVISION SIGN) 127 | 120 0x00F8 ø (LATIN SMALL LETTER O WITH STROKE) 128 | 121 0x00F9 ù (LATIN SMALL LETTER U WITH GRAVE) 129 | 122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE) 130 | 123 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX) 131 | 124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS) 132 | 125 0x00FD ý (LATIN SMALL LETTER Y WITH ACUTE) 133 | 126 0x00FE þ (LATIN SMALL LETTER THORN) 134 | 127 0x00FF ÿ (LATIN SMALL LETTER Y WITH DIAERESIS) 135 | -------------------------------------------------------------------------------- /encodings/index-windows-1253.txt: -------------------------------------------------------------------------------- 1 | # For details on index index-windows-1253.txt see the Encoding Standard 2 | # https://encoding.spec.whatwg.org/ 3 | # 4 | # Identifier: 49fdc881a3488904dd1e8dfba9aef3258454249958b611bcded1d4c981ab5561 5 | # Date: 2018-01-06 6 | 7 | 0 0x20AC € (EURO SIGN) 8 | 1 0x0081  () 9 | 2 0x201A ‚ (SINGLE LOW-9 QUOTATION MARK) 10 | 3 0x0192 ƒ (LATIN SMALL LETTER F WITH HOOK) 11 | 4 0x201E „ (DOUBLE LOW-9 QUOTATION MARK) 12 | 5 0x2026 … (HORIZONTAL ELLIPSIS) 13 | 6 0x2020 † (DAGGER) 14 | 7 0x2021 ‡ (DOUBLE DAGGER) 15 | 8 0x0088 ˆ () 16 | 9 0x2030 ‰ (PER MILLE SIGN) 17 | 10 0x008A Š () 18 | 11 0x2039 ‹ (SINGLE LEFT-POINTING ANGLE QUOTATION MARK) 19 | 12 0x008C Œ () 20 | 13 0x008D  () 21 | 14 0x008E Ž () 22 | 15 0x008F  () 23 | 16 0x0090  () 24 | 17 0x2018 ‘ (LEFT SINGLE QUOTATION MARK) 25 | 18 0x2019 ’ (RIGHT SINGLE QUOTATION MARK) 26 | 19 0x201C “ (LEFT DOUBLE QUOTATION MARK) 27 | 20 0x201D ” (RIGHT DOUBLE QUOTATION MARK) 28 | 21 0x2022 • (BULLET) 29 | 22 0x2013 – (EN DASH) 30 | 23 0x2014 — (EM DASH) 31 | 24 0x0098 ˜ () 32 | 25 0x2122 ™ (TRADE MARK SIGN) 33 | 26 0x009A š () 34 | 27 0x203A › (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK) 35 | 28 0x009C œ () 36 | 29 0x009D  () 37 | 30 0x009E ž () 38 | 31 0x009F Ÿ () 39 | 32 0x00A0   (NO-BREAK SPACE) 40 | 33 0x0385 ΅ (GREEK DIALYTIKA TONOS) 41 | 34 0x0386 Ά (GREEK CAPITAL LETTER ALPHA WITH TONOS) 42 | 35 0x00A3 £ (POUND SIGN) 43 | 36 0x00A4 ¤ (CURRENCY SIGN) 44 | 37 0x00A5 ¥ (YEN SIGN) 45 | 38 0x00A6 ¦ (BROKEN BAR) 46 | 39 0x00A7 § (SECTION SIGN) 47 | 40 0x00A8 ¨ (DIAERESIS) 48 | 41 0x00A9 © (COPYRIGHT SIGN) 49 | 43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) 50 | 44 0x00AC ¬ (NOT SIGN) 51 | 45 0x00AD ­ (SOFT HYPHEN) 52 | 46 0x00AE ® (REGISTERED SIGN) 53 | 47 0x2015 ― (HORIZONTAL BAR) 54 | 48 0x00B0 ° (DEGREE SIGN) 55 | 49 0x00B1 ± (PLUS-MINUS SIGN) 56 | 50 0x00B2 ² (SUPERSCRIPT TWO) 57 | 51 0x00B3 ³ (SUPERSCRIPT THREE) 58 | 52 0x0384 ΄ (GREEK TONOS) 59 | 53 0x00B5 µ (MICRO SIGN) 60 | 54 0x00B6 ¶ (PILCROW SIGN) 61 | 55 0x00B7 · (MIDDLE DOT) 62 | 56 0x0388 Έ (GREEK CAPITAL LETTER EPSILON WITH TONOS) 63 | 57 0x0389 Ή (GREEK CAPITAL LETTER ETA WITH TONOS) 64 | 58 0x038A Ί (GREEK CAPITAL LETTER IOTA WITH TONOS) 65 | 59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) 66 | 60 0x038C Ό (GREEK CAPITAL LETTER OMICRON WITH TONOS) 67 | 61 0x00BD ½ (VULGAR FRACTION ONE HALF) 68 | 62 0x038E Ύ (GREEK CAPITAL LETTER UPSILON WITH TONOS) 69 | 63 0x038F Ώ (GREEK CAPITAL LETTER OMEGA WITH TONOS) 70 | 64 0x0390 ΐ (GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS) 71 | 65 0x0391 Α (GREEK CAPITAL LETTER ALPHA) 72 | 66 0x0392 Β (GREEK CAPITAL LETTER BETA) 73 | 67 0x0393 Γ (GREEK CAPITAL LETTER GAMMA) 74 | 68 0x0394 Δ (GREEK CAPITAL LETTER DELTA) 75 | 69 0x0395 Ε (GREEK CAPITAL LETTER EPSILON) 76 | 70 0x0396 Ζ (GREEK CAPITAL LETTER ZETA) 77 | 71 0x0397 Η (GREEK CAPITAL LETTER ETA) 78 | 72 0x0398 Θ (GREEK CAPITAL LETTER THETA) 79 | 73 0x0399 Ι (GREEK CAPITAL LETTER IOTA) 80 | 74 0x039A Κ (GREEK CAPITAL LETTER KAPPA) 81 | 75 0x039B Λ (GREEK CAPITAL LETTER LAMDA) 82 | 76 0x039C Μ (GREEK CAPITAL LETTER MU) 83 | 77 0x039D Ν (GREEK CAPITAL LETTER NU) 84 | 78 0x039E Ξ (GREEK CAPITAL LETTER XI) 85 | 79 0x039F Ο (GREEK CAPITAL LETTER OMICRON) 86 | 80 0x03A0 Π (GREEK CAPITAL LETTER PI) 87 | 81 0x03A1 Ρ (GREEK CAPITAL LETTER RHO) 88 | 83 0x03A3 Σ (GREEK CAPITAL LETTER SIGMA) 89 | 84 0x03A4 Τ (GREEK CAPITAL LETTER TAU) 90 | 85 0x03A5 Υ (GREEK CAPITAL LETTER UPSILON) 91 | 86 0x03A6 Φ (GREEK CAPITAL LETTER PHI) 92 | 87 0x03A7 Χ (GREEK CAPITAL LETTER CHI) 93 | 88 0x03A8 Ψ (GREEK CAPITAL LETTER PSI) 94 | 89 0x03A9 Ω (GREEK CAPITAL LETTER OMEGA) 95 | 90 0x03AA Ϊ (GREEK CAPITAL LETTER IOTA WITH DIALYTIKA) 96 | 91 0x03AB Ϋ (GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA) 97 | 92 0x03AC ά (GREEK SMALL LETTER ALPHA WITH TONOS) 98 | 93 0x03AD έ (GREEK SMALL LETTER EPSILON WITH TONOS) 99 | 94 0x03AE ή (GREEK SMALL LETTER ETA WITH TONOS) 100 | 95 0x03AF ί (GREEK SMALL LETTER IOTA WITH TONOS) 101 | 96 0x03B0 ΰ (GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS) 102 | 97 0x03B1 α (GREEK SMALL LETTER ALPHA) 103 | 98 0x03B2 β (GREEK SMALL LETTER BETA) 104 | 99 0x03B3 γ (GREEK SMALL LETTER GAMMA) 105 | 100 0x03B4 δ (GREEK SMALL LETTER DELTA) 106 | 101 0x03B5 ε (GREEK SMALL LETTER EPSILON) 107 | 102 0x03B6 ζ (GREEK SMALL LETTER ZETA) 108 | 103 0x03B7 η (GREEK SMALL LETTER ETA) 109 | 104 0x03B8 θ (GREEK SMALL LETTER THETA) 110 | 105 0x03B9 ι (GREEK SMALL LETTER IOTA) 111 | 106 0x03BA κ (GREEK SMALL LETTER KAPPA) 112 | 107 0x03BB λ (GREEK SMALL LETTER LAMDA) 113 | 108 0x03BC μ (GREEK SMALL LETTER MU) 114 | 109 0x03BD ν (GREEK SMALL LETTER NU) 115 | 110 0x03BE ξ (GREEK SMALL LETTER XI) 116 | 111 0x03BF ο (GREEK SMALL LETTER OMICRON) 117 | 112 0x03C0 π (GREEK SMALL LETTER PI) 118 | 113 0x03C1 ρ (GREEK SMALL LETTER RHO) 119 | 114 0x03C2 ς (GREEK SMALL LETTER FINAL SIGMA) 120 | 115 0x03C3 σ (GREEK SMALL LETTER SIGMA) 121 | 116 0x03C4 τ (GREEK SMALL LETTER TAU) 122 | 117 0x03C5 υ (GREEK SMALL LETTER UPSILON) 123 | 118 0x03C6 φ (GREEK SMALL LETTER PHI) 124 | 119 0x03C7 χ (GREEK SMALL LETTER CHI) 125 | 120 0x03C8 ψ (GREEK SMALL LETTER PSI) 126 | 121 0x03C9 ω (GREEK SMALL LETTER OMEGA) 127 | 122 0x03CA ϊ (GREEK SMALL LETTER IOTA WITH DIALYTIKA) 128 | 123 0x03CB ϋ (GREEK SMALL LETTER UPSILON WITH DIALYTIKA) 129 | 124 0x03CC ό (GREEK SMALL LETTER OMICRON WITH TONOS) 130 | 125 0x03CD ύ (GREEK SMALL LETTER UPSILON WITH TONOS) 131 | 126 0x03CE ώ (GREEK SMALL LETTER OMEGA WITH TONOS) 132 | -------------------------------------------------------------------------------- /encodings/index-windows-1254.txt: -------------------------------------------------------------------------------- 1 | # For details on index index-windows-1254.txt see the Encoding Standard 2 | # https://encoding.spec.whatwg.org/ 3 | # 4 | # Identifier: e80a27adf377438be8ba5bd223875ea56d6a4d47f958cce1c957a2c446825caa 5 | # Date: 2018-01-06 6 | 7 | 0 0x20AC € (EURO SIGN) 8 | 1 0x0081  () 9 | 2 0x201A ‚ (SINGLE LOW-9 QUOTATION MARK) 10 | 3 0x0192 ƒ (LATIN SMALL LETTER F WITH HOOK) 11 | 4 0x201E „ (DOUBLE LOW-9 QUOTATION MARK) 12 | 5 0x2026 … (HORIZONTAL ELLIPSIS) 13 | 6 0x2020 † (DAGGER) 14 | 7 0x2021 ‡ (DOUBLE DAGGER) 15 | 8 0x02C6 ˆ (MODIFIER LETTER CIRCUMFLEX ACCENT) 16 | 9 0x2030 ‰ (PER MILLE SIGN) 17 | 10 0x0160 Š (LATIN CAPITAL LETTER S WITH CARON) 18 | 11 0x2039 ‹ (SINGLE LEFT-POINTING ANGLE QUOTATION MARK) 19 | 12 0x0152 Œ (LATIN CAPITAL LIGATURE OE) 20 | 13 0x008D  () 21 | 14 0x008E Ž () 22 | 15 0x008F  () 23 | 16 0x0090  () 24 | 17 0x2018 ‘ (LEFT SINGLE QUOTATION MARK) 25 | 18 0x2019 ’ (RIGHT SINGLE QUOTATION MARK) 26 | 19 0x201C “ (LEFT DOUBLE QUOTATION MARK) 27 | 20 0x201D ” (RIGHT DOUBLE QUOTATION MARK) 28 | 21 0x2022 • (BULLET) 29 | 22 0x2013 – (EN DASH) 30 | 23 0x2014 — (EM DASH) 31 | 24 0x02DC ˜ (SMALL TILDE) 32 | 25 0x2122 ™ (TRADE MARK SIGN) 33 | 26 0x0161 š (LATIN SMALL LETTER S WITH CARON) 34 | 27 0x203A › (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK) 35 | 28 0x0153 œ (LATIN SMALL LIGATURE OE) 36 | 29 0x009D  () 37 | 30 0x009E ž () 38 | 31 0x0178 Ÿ (LATIN CAPITAL LETTER Y WITH DIAERESIS) 39 | 32 0x00A0   (NO-BREAK SPACE) 40 | 33 0x00A1 ¡ (INVERTED EXCLAMATION MARK) 41 | 34 0x00A2 ¢ (CENT SIGN) 42 | 35 0x00A3 £ (POUND SIGN) 43 | 36 0x00A4 ¤ (CURRENCY SIGN) 44 | 37 0x00A5 ¥ (YEN SIGN) 45 | 38 0x00A6 ¦ (BROKEN BAR) 46 | 39 0x00A7 § (SECTION SIGN) 47 | 40 0x00A8 ¨ (DIAERESIS) 48 | 41 0x00A9 © (COPYRIGHT SIGN) 49 | 42 0x00AA ª (FEMININE ORDINAL INDICATOR) 50 | 43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) 51 | 44 0x00AC ¬ (NOT SIGN) 52 | 45 0x00AD ­ (SOFT HYPHEN) 53 | 46 0x00AE ® (REGISTERED SIGN) 54 | 47 0x00AF ¯ (MACRON) 55 | 48 0x00B0 ° (DEGREE SIGN) 56 | 49 0x00B1 ± (PLUS-MINUS SIGN) 57 | 50 0x00B2 ² (SUPERSCRIPT TWO) 58 | 51 0x00B3 ³ (SUPERSCRIPT THREE) 59 | 52 0x00B4 ´ (ACUTE ACCENT) 60 | 53 0x00B5 µ (MICRO SIGN) 61 | 54 0x00B6 ¶ (PILCROW SIGN) 62 | 55 0x00B7 · (MIDDLE DOT) 63 | 56 0x00B8 ¸ (CEDILLA) 64 | 57 0x00B9 ¹ (SUPERSCRIPT ONE) 65 | 58 0x00BA º (MASCULINE ORDINAL INDICATOR) 66 | 59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) 67 | 60 0x00BC ¼ (VULGAR FRACTION ONE QUARTER) 68 | 61 0x00BD ½ (VULGAR FRACTION ONE HALF) 69 | 62 0x00BE ¾ (VULGAR FRACTION THREE QUARTERS) 70 | 63 0x00BF ¿ (INVERTED QUESTION MARK) 71 | 64 0x00C0 À (LATIN CAPITAL LETTER A WITH GRAVE) 72 | 65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE) 73 | 66 0x00C2  (LATIN CAPITAL LETTER A WITH CIRCUMFLEX) 74 | 67 0x00C3 à (LATIN CAPITAL LETTER A WITH TILDE) 75 | 68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS) 76 | 69 0x00C5 Å (LATIN CAPITAL LETTER A WITH RING ABOVE) 77 | 70 0x00C6 Æ (LATIN CAPITAL LETTER AE) 78 | 71 0x00C7 Ç (LATIN CAPITAL LETTER C WITH CEDILLA) 79 | 72 0x00C8 È (LATIN CAPITAL LETTER E WITH GRAVE) 80 | 73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE) 81 | 74 0x00CA Ê (LATIN CAPITAL LETTER E WITH CIRCUMFLEX) 82 | 75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS) 83 | 76 0x00CC Ì (LATIN CAPITAL LETTER I WITH GRAVE) 84 | 77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE) 85 | 78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX) 86 | 79 0x00CF Ï (LATIN CAPITAL LETTER I WITH DIAERESIS) 87 | 80 0x011E Ğ (LATIN CAPITAL LETTER G WITH BREVE) 88 | 81 0x00D1 Ñ (LATIN CAPITAL LETTER N WITH TILDE) 89 | 82 0x00D2 Ò (LATIN CAPITAL LETTER O WITH GRAVE) 90 | 83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE) 91 | 84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX) 92 | 85 0x00D5 Õ (LATIN CAPITAL LETTER O WITH TILDE) 93 | 86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS) 94 | 87 0x00D7 × (MULTIPLICATION SIGN) 95 | 88 0x00D8 Ø (LATIN CAPITAL LETTER O WITH STROKE) 96 | 89 0x00D9 Ù (LATIN CAPITAL LETTER U WITH GRAVE) 97 | 90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE) 98 | 91 0x00DB Û (LATIN CAPITAL LETTER U WITH CIRCUMFLEX) 99 | 92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS) 100 | 93 0x0130 İ (LATIN CAPITAL LETTER I WITH DOT ABOVE) 101 | 94 0x015E Ş (LATIN CAPITAL LETTER S WITH CEDILLA) 102 | 95 0x00DF ß (LATIN SMALL LETTER SHARP S) 103 | 96 0x00E0 à (LATIN SMALL LETTER A WITH GRAVE) 104 | 97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE) 105 | 98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX) 106 | 99 0x00E3 ã (LATIN SMALL LETTER A WITH TILDE) 107 | 100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS) 108 | 101 0x00E5 å (LATIN SMALL LETTER A WITH RING ABOVE) 109 | 102 0x00E6 æ (LATIN SMALL LETTER AE) 110 | 103 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA) 111 | 104 0x00E8 è (LATIN SMALL LETTER E WITH GRAVE) 112 | 105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE) 113 | 106 0x00EA ê (LATIN SMALL LETTER E WITH CIRCUMFLEX) 114 | 107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS) 115 | 108 0x00EC ì (LATIN SMALL LETTER I WITH GRAVE) 116 | 109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE) 117 | 110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX) 118 | 111 0x00EF ï (LATIN SMALL LETTER I WITH DIAERESIS) 119 | 112 0x011F ğ (LATIN SMALL LETTER G WITH BREVE) 120 | 113 0x00F1 ñ (LATIN SMALL LETTER N WITH TILDE) 121 | 114 0x00F2 ò (LATIN SMALL LETTER O WITH GRAVE) 122 | 115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE) 123 | 116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX) 124 | 117 0x00F5 õ (LATIN SMALL LETTER O WITH TILDE) 125 | 118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS) 126 | 119 0x00F7 ÷ (DIVISION SIGN) 127 | 120 0x00F8 ø (LATIN SMALL LETTER O WITH STROKE) 128 | 121 0x00F9 ù (LATIN SMALL LETTER U WITH GRAVE) 129 | 122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE) 130 | 123 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX) 131 | 124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS) 132 | 125 0x0131 ı (LATIN SMALL LETTER DOTLESS I) 133 | 126 0x015F ş (LATIN SMALL LETTER S WITH CEDILLA) 134 | 127 0x00FF ÿ (LATIN SMALL LETTER Y WITH DIAERESIS) 135 | -------------------------------------------------------------------------------- /encodings/index-windows-1255.txt: -------------------------------------------------------------------------------- 1 | # For details on index index-windows-1255.txt see the Encoding Standard 2 | # https://encoding.spec.whatwg.org/ 3 | # 4 | # Identifier: cd7fb43c97eefa1651084d92d02af53ad668bd848528c18c3b1af5c06b499651 5 | # Date: 2018-01-06 6 | 7 | 0 0x20AC € (EURO SIGN) 8 | 1 0x0081  () 9 | 2 0x201A ‚ (SINGLE LOW-9 QUOTATION MARK) 10 | 3 0x0192 ƒ (LATIN SMALL LETTER F WITH HOOK) 11 | 4 0x201E „ (DOUBLE LOW-9 QUOTATION MARK) 12 | 5 0x2026 … (HORIZONTAL ELLIPSIS) 13 | 6 0x2020 † (DAGGER) 14 | 7 0x2021 ‡ (DOUBLE DAGGER) 15 | 8 0x02C6 ˆ (MODIFIER LETTER CIRCUMFLEX ACCENT) 16 | 9 0x2030 ‰ (PER MILLE SIGN) 17 | 10 0x008A Š () 18 | 11 0x2039 ‹ (SINGLE LEFT-POINTING ANGLE QUOTATION MARK) 19 | 12 0x008C Œ () 20 | 13 0x008D  () 21 | 14 0x008E Ž () 22 | 15 0x008F  () 23 | 16 0x0090  () 24 | 17 0x2018 ‘ (LEFT SINGLE QUOTATION MARK) 25 | 18 0x2019 ’ (RIGHT SINGLE QUOTATION MARK) 26 | 19 0x201C “ (LEFT DOUBLE QUOTATION MARK) 27 | 20 0x201D ” (RIGHT DOUBLE QUOTATION MARK) 28 | 21 0x2022 • (BULLET) 29 | 22 0x2013 – (EN DASH) 30 | 23 0x2014 — (EM DASH) 31 | 24 0x02DC ˜ (SMALL TILDE) 32 | 25 0x2122 ™ (TRADE MARK SIGN) 33 | 26 0x009A š () 34 | 27 0x203A › (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK) 35 | 28 0x009C œ () 36 | 29 0x009D  () 37 | 30 0x009E ž () 38 | 31 0x009F Ÿ () 39 | 32 0x00A0   (NO-BREAK SPACE) 40 | 33 0x00A1 ¡ (INVERTED EXCLAMATION MARK) 41 | 34 0x00A2 ¢ (CENT SIGN) 42 | 35 0x00A3 £ (POUND SIGN) 43 | 36 0x20AA ₪ (NEW SHEQEL SIGN) 44 | 37 0x00A5 ¥ (YEN SIGN) 45 | 38 0x00A6 ¦ (BROKEN BAR) 46 | 39 0x00A7 § (SECTION SIGN) 47 | 40 0x00A8 ¨ (DIAERESIS) 48 | 41 0x00A9 © (COPYRIGHT SIGN) 49 | 42 0x00D7 × (MULTIPLICATION SIGN) 50 | 43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) 51 | 44 0x00AC ¬ (NOT SIGN) 52 | 45 0x00AD ­ (SOFT HYPHEN) 53 | 46 0x00AE ® (REGISTERED SIGN) 54 | 47 0x00AF ¯ (MACRON) 55 | 48 0x00B0 ° (DEGREE SIGN) 56 | 49 0x00B1 ± (PLUS-MINUS SIGN) 57 | 50 0x00B2 ² (SUPERSCRIPT TWO) 58 | 51 0x00B3 ³ (SUPERSCRIPT THREE) 59 | 52 0x00B4 ´ (ACUTE ACCENT) 60 | 53 0x00B5 µ (MICRO SIGN) 61 | 54 0x00B6 ¶ (PILCROW SIGN) 62 | 55 0x00B7 · (MIDDLE DOT) 63 | 56 0x00B8 ¸ (CEDILLA) 64 | 57 0x00B9 ¹ (SUPERSCRIPT ONE) 65 | 58 0x00F7 ÷ (DIVISION SIGN) 66 | 59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) 67 | 60 0x00BC ¼ (VULGAR FRACTION ONE QUARTER) 68 | 61 0x00BD ½ (VULGAR FRACTION ONE HALF) 69 | 62 0x00BE ¾ (VULGAR FRACTION THREE QUARTERS) 70 | 63 0x00BF ¿ (INVERTED QUESTION MARK) 71 | 64 0x05B0 ְ (HEBREW POINT SHEVA) 72 | 65 0x05B1 ֱ (HEBREW POINT HATAF SEGOL) 73 | 66 0x05B2 ֲ (HEBREW POINT HATAF PATAH) 74 | 67 0x05B3 ֳ (HEBREW POINT HATAF QAMATS) 75 | 68 0x05B4 ִ (HEBREW POINT HIRIQ) 76 | 69 0x05B5 ֵ (HEBREW POINT TSERE) 77 | 70 0x05B6 ֶ (HEBREW POINT SEGOL) 78 | 71 0x05B7 ַ (HEBREW POINT PATAH) 79 | 72 0x05B8 ָ (HEBREW POINT QAMATS) 80 | 73 0x05B9 ֹ (HEBREW POINT HOLAM) 81 | 74 0x05BA ֺ (HEBREW POINT HOLAM HASER FOR VAV) 82 | 75 0x05BB ֻ (HEBREW POINT QUBUTS) 83 | 76 0x05BC ּ (HEBREW POINT DAGESH OR MAPIQ) 84 | 77 0x05BD ֽ (HEBREW POINT METEG) 85 | 78 0x05BE ־ (HEBREW PUNCTUATION MAQAF) 86 | 79 0x05BF ֿ (HEBREW POINT RAFE) 87 | 80 0x05C0 ׀ (HEBREW PUNCTUATION PASEQ) 88 | 81 0x05C1 ׁ (HEBREW POINT SHIN DOT) 89 | 82 0x05C2 ׂ (HEBREW POINT SIN DOT) 90 | 83 0x05C3 ׃ (HEBREW PUNCTUATION SOF PASUQ) 91 | 84 0x05F0 װ (HEBREW LIGATURE YIDDISH DOUBLE VAV) 92 | 85 0x05F1 ױ (HEBREW LIGATURE YIDDISH VAV YOD) 93 | 86 0x05F2 ײ (HEBREW LIGATURE YIDDISH DOUBLE YOD) 94 | 87 0x05F3 ׳ (HEBREW PUNCTUATION GERESH) 95 | 88 0x05F4 ״ (HEBREW PUNCTUATION GERSHAYIM) 96 | 96 0x05D0 א (HEBREW LETTER ALEF) 97 | 97 0x05D1 ב (HEBREW LETTER BET) 98 | 98 0x05D2 ג (HEBREW LETTER GIMEL) 99 | 99 0x05D3 ד (HEBREW LETTER DALET) 100 | 100 0x05D4 ה (HEBREW LETTER HE) 101 | 101 0x05D5 ו (HEBREW LETTER VAV) 102 | 102 0x05D6 ז (HEBREW LETTER ZAYIN) 103 | 103 0x05D7 ח (HEBREW LETTER HET) 104 | 104 0x05D8 ט (HEBREW LETTER TET) 105 | 105 0x05D9 י (HEBREW LETTER YOD) 106 | 106 0x05DA ך (HEBREW LETTER FINAL KAF) 107 | 107 0x05DB כ (HEBREW LETTER KAF) 108 | 108 0x05DC ל (HEBREW LETTER LAMED) 109 | 109 0x05DD ם (HEBREW LETTER FINAL MEM) 110 | 110 0x05DE מ (HEBREW LETTER MEM) 111 | 111 0x05DF ן (HEBREW LETTER FINAL NUN) 112 | 112 0x05E0 נ (HEBREW LETTER NUN) 113 | 113 0x05E1 ס (HEBREW LETTER SAMEKH) 114 | 114 0x05E2 ע (HEBREW LETTER AYIN) 115 | 115 0x05E3 ף (HEBREW LETTER FINAL PE) 116 | 116 0x05E4 פ (HEBREW LETTER PE) 117 | 117 0x05E5 ץ (HEBREW LETTER FINAL TSADI) 118 | 118 0x05E6 צ (HEBREW LETTER TSADI) 119 | 119 0x05E7 ק (HEBREW LETTER QOF) 120 | 120 0x05E8 ר (HEBREW LETTER RESH) 121 | 121 0x05E9 ש (HEBREW LETTER SHIN) 122 | 122 0x05EA ת (HEBREW LETTER TAV) 123 | 125 0x200E ‎ (LEFT-TO-RIGHT MARK) 124 | 126 0x200F ‏ (RIGHT-TO-LEFT MARK) 125 | -------------------------------------------------------------------------------- /encodings/index-windows-1256.txt: -------------------------------------------------------------------------------- 1 | # For details on index index-windows-1256.txt see the Encoding Standard 2 | # https://encoding.spec.whatwg.org/ 3 | # 4 | # Identifier: 161bdb381f16408e8bebcc8f5310c4190af0e359de8d9bbaa3628ce2f0875509 5 | # Date: 2018-01-06 6 | 7 | 0 0x20AC € (EURO SIGN) 8 | 1 0x067E پ (ARABIC LETTER PEH) 9 | 2 0x201A ‚ (SINGLE LOW-9 QUOTATION MARK) 10 | 3 0x0192 ƒ (LATIN SMALL LETTER F WITH HOOK) 11 | 4 0x201E „ (DOUBLE LOW-9 QUOTATION MARK) 12 | 5 0x2026 … (HORIZONTAL ELLIPSIS) 13 | 6 0x2020 † (DAGGER) 14 | 7 0x2021 ‡ (DOUBLE DAGGER) 15 | 8 0x02C6 ˆ (MODIFIER LETTER CIRCUMFLEX ACCENT) 16 | 9 0x2030 ‰ (PER MILLE SIGN) 17 | 10 0x0679 ٹ (ARABIC LETTER TTEH) 18 | 11 0x2039 ‹ (SINGLE LEFT-POINTING ANGLE QUOTATION MARK) 19 | 12 0x0152 Œ (LATIN CAPITAL LIGATURE OE) 20 | 13 0x0686 چ (ARABIC LETTER TCHEH) 21 | 14 0x0698 ژ (ARABIC LETTER JEH) 22 | 15 0x0688 ڈ (ARABIC LETTER DDAL) 23 | 16 0x06AF گ (ARABIC LETTER GAF) 24 | 17 0x2018 ‘ (LEFT SINGLE QUOTATION MARK) 25 | 18 0x2019 ’ (RIGHT SINGLE QUOTATION MARK) 26 | 19 0x201C “ (LEFT DOUBLE QUOTATION MARK) 27 | 20 0x201D ” (RIGHT DOUBLE QUOTATION MARK) 28 | 21 0x2022 • (BULLET) 29 | 22 0x2013 – (EN DASH) 30 | 23 0x2014 — (EM DASH) 31 | 24 0x06A9 ک (ARABIC LETTER KEHEH) 32 | 25 0x2122 ™ (TRADE MARK SIGN) 33 | 26 0x0691 ڑ (ARABIC LETTER RREH) 34 | 27 0x203A › (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK) 35 | 28 0x0153 œ (LATIN SMALL LIGATURE OE) 36 | 29 0x200C ‌ (ZERO WIDTH NON-JOINER) 37 | 30 0x200D ‍ (ZERO WIDTH JOINER) 38 | 31 0x06BA ں (ARABIC LETTER NOON GHUNNA) 39 | 32 0x00A0   (NO-BREAK SPACE) 40 | 33 0x060C ، (ARABIC COMMA) 41 | 34 0x00A2 ¢ (CENT SIGN) 42 | 35 0x00A3 £ (POUND SIGN) 43 | 36 0x00A4 ¤ (CURRENCY SIGN) 44 | 37 0x00A5 ¥ (YEN SIGN) 45 | 38 0x00A6 ¦ (BROKEN BAR) 46 | 39 0x00A7 § (SECTION SIGN) 47 | 40 0x00A8 ¨ (DIAERESIS) 48 | 41 0x00A9 © (COPYRIGHT SIGN) 49 | 42 0x06BE ھ (ARABIC LETTER HEH DOACHASHMEE) 50 | 43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) 51 | 44 0x00AC ¬ (NOT SIGN) 52 | 45 0x00AD ­ (SOFT HYPHEN) 53 | 46 0x00AE ® (REGISTERED SIGN) 54 | 47 0x00AF ¯ (MACRON) 55 | 48 0x00B0 ° (DEGREE SIGN) 56 | 49 0x00B1 ± (PLUS-MINUS SIGN) 57 | 50 0x00B2 ² (SUPERSCRIPT TWO) 58 | 51 0x00B3 ³ (SUPERSCRIPT THREE) 59 | 52 0x00B4 ´ (ACUTE ACCENT) 60 | 53 0x00B5 µ (MICRO SIGN) 61 | 54 0x00B6 ¶ (PILCROW SIGN) 62 | 55 0x00B7 · (MIDDLE DOT) 63 | 56 0x00B8 ¸ (CEDILLA) 64 | 57 0x00B9 ¹ (SUPERSCRIPT ONE) 65 | 58 0x061B ؛ (ARABIC SEMICOLON) 66 | 59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) 67 | 60 0x00BC ¼ (VULGAR FRACTION ONE QUARTER) 68 | 61 0x00BD ½ (VULGAR FRACTION ONE HALF) 69 | 62 0x00BE ¾ (VULGAR FRACTION THREE QUARTERS) 70 | 63 0x061F ؟ (ARABIC QUESTION MARK) 71 | 64 0x06C1 ہ (ARABIC LETTER HEH GOAL) 72 | 65 0x0621 ء (ARABIC LETTER HAMZA) 73 | 66 0x0622 آ (ARABIC LETTER ALEF WITH MADDA ABOVE) 74 | 67 0x0623 أ (ARABIC LETTER ALEF WITH HAMZA ABOVE) 75 | 68 0x0624 ؤ (ARABIC LETTER WAW WITH HAMZA ABOVE) 76 | 69 0x0625 إ (ARABIC LETTER ALEF WITH HAMZA BELOW) 77 | 70 0x0626 ئ (ARABIC LETTER YEH WITH HAMZA ABOVE) 78 | 71 0x0627 ا (ARABIC LETTER ALEF) 79 | 72 0x0628 ب (ARABIC LETTER BEH) 80 | 73 0x0629 ة (ARABIC LETTER TEH MARBUTA) 81 | 74 0x062A ت (ARABIC LETTER TEH) 82 | 75 0x062B ث (ARABIC LETTER THEH) 83 | 76 0x062C ج (ARABIC LETTER JEEM) 84 | 77 0x062D ح (ARABIC LETTER HAH) 85 | 78 0x062E خ (ARABIC LETTER KHAH) 86 | 79 0x062F د (ARABIC LETTER DAL) 87 | 80 0x0630 ذ (ARABIC LETTER THAL) 88 | 81 0x0631 ر (ARABIC LETTER REH) 89 | 82 0x0632 ز (ARABIC LETTER ZAIN) 90 | 83 0x0633 س (ARABIC LETTER SEEN) 91 | 84 0x0634 ش (ARABIC LETTER SHEEN) 92 | 85 0x0635 ص (ARABIC LETTER SAD) 93 | 86 0x0636 ض (ARABIC LETTER DAD) 94 | 87 0x00D7 × (MULTIPLICATION SIGN) 95 | 88 0x0637 ط (ARABIC LETTER TAH) 96 | 89 0x0638 ظ (ARABIC LETTER ZAH) 97 | 90 0x0639 ع (ARABIC LETTER AIN) 98 | 91 0x063A غ (ARABIC LETTER GHAIN) 99 | 92 0x0640 ـ (ARABIC TATWEEL) 100 | 93 0x0641 ف (ARABIC LETTER FEH) 101 | 94 0x0642 ق (ARABIC LETTER QAF) 102 | 95 0x0643 ك (ARABIC LETTER KAF) 103 | 96 0x00E0 à (LATIN SMALL LETTER A WITH GRAVE) 104 | 97 0x0644 ل (ARABIC LETTER LAM) 105 | 98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX) 106 | 99 0x0645 م (ARABIC LETTER MEEM) 107 | 100 0x0646 ن (ARABIC LETTER NOON) 108 | 101 0x0647 ه (ARABIC LETTER HEH) 109 | 102 0x0648 و (ARABIC LETTER WAW) 110 | 103 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA) 111 | 104 0x00E8 è (LATIN SMALL LETTER E WITH GRAVE) 112 | 105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE) 113 | 106 0x00EA ê (LATIN SMALL LETTER E WITH CIRCUMFLEX) 114 | 107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS) 115 | 108 0x0649 ى (ARABIC LETTER ALEF MAKSURA) 116 | 109 0x064A ي (ARABIC LETTER YEH) 117 | 110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX) 118 | 111 0x00EF ï (LATIN SMALL LETTER I WITH DIAERESIS) 119 | 112 0x064B ً (ARABIC FATHATAN) 120 | 113 0x064C ٌ (ARABIC DAMMATAN) 121 | 114 0x064D ٍ (ARABIC KASRATAN) 122 | 115 0x064E َ (ARABIC FATHA) 123 | 116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX) 124 | 117 0x064F ُ (ARABIC DAMMA) 125 | 118 0x0650 ِ (ARABIC KASRA) 126 | 119 0x00F7 ÷ (DIVISION SIGN) 127 | 120 0x0651 ّ (ARABIC SHADDA) 128 | 121 0x00F9 ù (LATIN SMALL LETTER U WITH GRAVE) 129 | 122 0x0652 ْ (ARABIC SUKUN) 130 | 123 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX) 131 | 124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS) 132 | 125 0x200E ‎ (LEFT-TO-RIGHT MARK) 133 | 126 0x200F ‏ (RIGHT-TO-LEFT MARK) 134 | 127 0x06D2 ے (ARABIC LETTER YEH BARREE) 135 | -------------------------------------------------------------------------------- /encodings/index-windows-1257.txt: -------------------------------------------------------------------------------- 1 | # For details on index index-windows-1257.txt see the Encoding Standard 2 | # https://encoding.spec.whatwg.org/ 3 | # 4 | # Identifier: cc7256bdd10a5b8dc7fb6f994659f307dfcae60def9aa6c29d811f85e2842c47 5 | # Date: 2018-01-06 6 | 7 | 0 0x20AC € (EURO SIGN) 8 | 1 0x0081  () 9 | 2 0x201A ‚ (SINGLE LOW-9 QUOTATION MARK) 10 | 3 0x0083 ƒ () 11 | 4 0x201E „ (DOUBLE LOW-9 QUOTATION MARK) 12 | 5 0x2026 … (HORIZONTAL ELLIPSIS) 13 | 6 0x2020 † (DAGGER) 14 | 7 0x2021 ‡ (DOUBLE DAGGER) 15 | 8 0x0088 ˆ () 16 | 9 0x2030 ‰ (PER MILLE SIGN) 17 | 10 0x008A Š () 18 | 11 0x2039 ‹ (SINGLE LEFT-POINTING ANGLE QUOTATION MARK) 19 | 12 0x008C Œ () 20 | 13 0x00A8 ¨ (DIAERESIS) 21 | 14 0x02C7 ˇ (CARON) 22 | 15 0x00B8 ¸ (CEDILLA) 23 | 16 0x0090  () 24 | 17 0x2018 ‘ (LEFT SINGLE QUOTATION MARK) 25 | 18 0x2019 ’ (RIGHT SINGLE QUOTATION MARK) 26 | 19 0x201C “ (LEFT DOUBLE QUOTATION MARK) 27 | 20 0x201D ” (RIGHT DOUBLE QUOTATION MARK) 28 | 21 0x2022 • (BULLET) 29 | 22 0x2013 – (EN DASH) 30 | 23 0x2014 — (EM DASH) 31 | 24 0x0098 ˜ () 32 | 25 0x2122 ™ (TRADE MARK SIGN) 33 | 26 0x009A š () 34 | 27 0x203A › (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK) 35 | 28 0x009C œ () 36 | 29 0x00AF ¯ (MACRON) 37 | 30 0x02DB ˛ (OGONEK) 38 | 31 0x009F Ÿ () 39 | 32 0x00A0   (NO-BREAK SPACE) 40 | 34 0x00A2 ¢ (CENT SIGN) 41 | 35 0x00A3 £ (POUND SIGN) 42 | 36 0x00A4 ¤ (CURRENCY SIGN) 43 | 38 0x00A6 ¦ (BROKEN BAR) 44 | 39 0x00A7 § (SECTION SIGN) 45 | 40 0x00D8 Ø (LATIN CAPITAL LETTER O WITH STROKE) 46 | 41 0x00A9 © (COPYRIGHT SIGN) 47 | 42 0x0156 Ŗ (LATIN CAPITAL LETTER R WITH CEDILLA) 48 | 43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) 49 | 44 0x00AC ¬ (NOT SIGN) 50 | 45 0x00AD ­ (SOFT HYPHEN) 51 | 46 0x00AE ® (REGISTERED SIGN) 52 | 47 0x00C6 Æ (LATIN CAPITAL LETTER AE) 53 | 48 0x00B0 ° (DEGREE SIGN) 54 | 49 0x00B1 ± (PLUS-MINUS SIGN) 55 | 50 0x00B2 ² (SUPERSCRIPT TWO) 56 | 51 0x00B3 ³ (SUPERSCRIPT THREE) 57 | 52 0x00B4 ´ (ACUTE ACCENT) 58 | 53 0x00B5 µ (MICRO SIGN) 59 | 54 0x00B6 ¶ (PILCROW SIGN) 60 | 55 0x00B7 · (MIDDLE DOT) 61 | 56 0x00F8 ø (LATIN SMALL LETTER O WITH STROKE) 62 | 57 0x00B9 ¹ (SUPERSCRIPT ONE) 63 | 58 0x0157 ŗ (LATIN SMALL LETTER R WITH CEDILLA) 64 | 59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) 65 | 60 0x00BC ¼ (VULGAR FRACTION ONE QUARTER) 66 | 61 0x00BD ½ (VULGAR FRACTION ONE HALF) 67 | 62 0x00BE ¾ (VULGAR FRACTION THREE QUARTERS) 68 | 63 0x00E6 æ (LATIN SMALL LETTER AE) 69 | 64 0x0104 Ą (LATIN CAPITAL LETTER A WITH OGONEK) 70 | 65 0x012E Į (LATIN CAPITAL LETTER I WITH OGONEK) 71 | 66 0x0100 Ā (LATIN CAPITAL LETTER A WITH MACRON) 72 | 67 0x0106 Ć (LATIN CAPITAL LETTER C WITH ACUTE) 73 | 68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS) 74 | 69 0x00C5 Å (LATIN CAPITAL LETTER A WITH RING ABOVE) 75 | 70 0x0118 Ę (LATIN CAPITAL LETTER E WITH OGONEK) 76 | 71 0x0112 Ē (LATIN CAPITAL LETTER E WITH MACRON) 77 | 72 0x010C Č (LATIN CAPITAL LETTER C WITH CARON) 78 | 73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE) 79 | 74 0x0179 Ź (LATIN CAPITAL LETTER Z WITH ACUTE) 80 | 75 0x0116 Ė (LATIN CAPITAL LETTER E WITH DOT ABOVE) 81 | 76 0x0122 Ģ (LATIN CAPITAL LETTER G WITH CEDILLA) 82 | 77 0x0136 Ķ (LATIN CAPITAL LETTER K WITH CEDILLA) 83 | 78 0x012A Ī (LATIN CAPITAL LETTER I WITH MACRON) 84 | 79 0x013B Ļ (LATIN CAPITAL LETTER L WITH CEDILLA) 85 | 80 0x0160 Š (LATIN CAPITAL LETTER S WITH CARON) 86 | 81 0x0143 Ń (LATIN CAPITAL LETTER N WITH ACUTE) 87 | 82 0x0145 Ņ (LATIN CAPITAL LETTER N WITH CEDILLA) 88 | 83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE) 89 | 84 0x014C Ō (LATIN CAPITAL LETTER O WITH MACRON) 90 | 85 0x00D5 Õ (LATIN CAPITAL LETTER O WITH TILDE) 91 | 86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS) 92 | 87 0x00D7 × (MULTIPLICATION SIGN) 93 | 88 0x0172 Ų (LATIN CAPITAL LETTER U WITH OGONEK) 94 | 89 0x0141 Ł (LATIN CAPITAL LETTER L WITH STROKE) 95 | 90 0x015A Ś (LATIN CAPITAL LETTER S WITH ACUTE) 96 | 91 0x016A Ū (LATIN CAPITAL LETTER U WITH MACRON) 97 | 92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS) 98 | 93 0x017B Ż (LATIN CAPITAL LETTER Z WITH DOT ABOVE) 99 | 94 0x017D Ž (LATIN CAPITAL LETTER Z WITH CARON) 100 | 95 0x00DF ß (LATIN SMALL LETTER SHARP S) 101 | 96 0x0105 ą (LATIN SMALL LETTER A WITH OGONEK) 102 | 97 0x012F į (LATIN SMALL LETTER I WITH OGONEK) 103 | 98 0x0101 ā (LATIN SMALL LETTER A WITH MACRON) 104 | 99 0x0107 ć (LATIN SMALL LETTER C WITH ACUTE) 105 | 100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS) 106 | 101 0x00E5 å (LATIN SMALL LETTER A WITH RING ABOVE) 107 | 102 0x0119 ę (LATIN SMALL LETTER E WITH OGONEK) 108 | 103 0x0113 ē (LATIN SMALL LETTER E WITH MACRON) 109 | 104 0x010D č (LATIN SMALL LETTER C WITH CARON) 110 | 105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE) 111 | 106 0x017A ź (LATIN SMALL LETTER Z WITH ACUTE) 112 | 107 0x0117 ė (LATIN SMALL LETTER E WITH DOT ABOVE) 113 | 108 0x0123 ģ (LATIN SMALL LETTER G WITH CEDILLA) 114 | 109 0x0137 ķ (LATIN SMALL LETTER K WITH CEDILLA) 115 | 110 0x012B ī (LATIN SMALL LETTER I WITH MACRON) 116 | 111 0x013C ļ (LATIN SMALL LETTER L WITH CEDILLA) 117 | 112 0x0161 š (LATIN SMALL LETTER S WITH CARON) 118 | 113 0x0144 ń (LATIN SMALL LETTER N WITH ACUTE) 119 | 114 0x0146 ņ (LATIN SMALL LETTER N WITH CEDILLA) 120 | 115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE) 121 | 116 0x014D ō (LATIN SMALL LETTER O WITH MACRON) 122 | 117 0x00F5 õ (LATIN SMALL LETTER O WITH TILDE) 123 | 118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS) 124 | 119 0x00F7 ÷ (DIVISION SIGN) 125 | 120 0x0173 ų (LATIN SMALL LETTER U WITH OGONEK) 126 | 121 0x0142 ł (LATIN SMALL LETTER L WITH STROKE) 127 | 122 0x015B ś (LATIN SMALL LETTER S WITH ACUTE) 128 | 123 0x016B ū (LATIN SMALL LETTER U WITH MACRON) 129 | 124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS) 130 | 125 0x017C ż (LATIN SMALL LETTER Z WITH DOT ABOVE) 131 | 126 0x017E ž (LATIN SMALL LETTER Z WITH CARON) 132 | 127 0x02D9 ˙ (DOT ABOVE) 133 | -------------------------------------------------------------------------------- /encodings/index-windows-1258.txt: -------------------------------------------------------------------------------- 1 | # For details on index index-windows-1258.txt see the Encoding Standard 2 | # https://encoding.spec.whatwg.org/ 3 | # 4 | # Identifier: 198bacedfcf24390e219240a7b776b6cec34cff070330b08a601a69c67f7eb24 5 | # Date: 2018-01-06 6 | 7 | 0 0x20AC € (EURO SIGN) 8 | 1 0x0081  () 9 | 2 0x201A ‚ (SINGLE LOW-9 QUOTATION MARK) 10 | 3 0x0192 ƒ (LATIN SMALL LETTER F WITH HOOK) 11 | 4 0x201E „ (DOUBLE LOW-9 QUOTATION MARK) 12 | 5 0x2026 … (HORIZONTAL ELLIPSIS) 13 | 6 0x2020 † (DAGGER) 14 | 7 0x2021 ‡ (DOUBLE DAGGER) 15 | 8 0x02C6 ˆ (MODIFIER LETTER CIRCUMFLEX ACCENT) 16 | 9 0x2030 ‰ (PER MILLE SIGN) 17 | 10 0x008A Š () 18 | 11 0x2039 ‹ (SINGLE LEFT-POINTING ANGLE QUOTATION MARK) 19 | 12 0x0152 Œ (LATIN CAPITAL LIGATURE OE) 20 | 13 0x008D  () 21 | 14 0x008E Ž () 22 | 15 0x008F  () 23 | 16 0x0090  () 24 | 17 0x2018 ‘ (LEFT SINGLE QUOTATION MARK) 25 | 18 0x2019 ’ (RIGHT SINGLE QUOTATION MARK) 26 | 19 0x201C “ (LEFT DOUBLE QUOTATION MARK) 27 | 20 0x201D ” (RIGHT DOUBLE QUOTATION MARK) 28 | 21 0x2022 • (BULLET) 29 | 22 0x2013 – (EN DASH) 30 | 23 0x2014 — (EM DASH) 31 | 24 0x02DC ˜ (SMALL TILDE) 32 | 25 0x2122 ™ (TRADE MARK SIGN) 33 | 26 0x009A š () 34 | 27 0x203A › (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK) 35 | 28 0x0153 œ (LATIN SMALL LIGATURE OE) 36 | 29 0x009D  () 37 | 30 0x009E ž () 38 | 31 0x0178 Ÿ (LATIN CAPITAL LETTER Y WITH DIAERESIS) 39 | 32 0x00A0   (NO-BREAK SPACE) 40 | 33 0x00A1 ¡ (INVERTED EXCLAMATION MARK) 41 | 34 0x00A2 ¢ (CENT SIGN) 42 | 35 0x00A3 £ (POUND SIGN) 43 | 36 0x00A4 ¤ (CURRENCY SIGN) 44 | 37 0x00A5 ¥ (YEN SIGN) 45 | 38 0x00A6 ¦ (BROKEN BAR) 46 | 39 0x00A7 § (SECTION SIGN) 47 | 40 0x00A8 ¨ (DIAERESIS) 48 | 41 0x00A9 © (COPYRIGHT SIGN) 49 | 42 0x00AA ª (FEMININE ORDINAL INDICATOR) 50 | 43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) 51 | 44 0x00AC ¬ (NOT SIGN) 52 | 45 0x00AD ­ (SOFT HYPHEN) 53 | 46 0x00AE ® (REGISTERED SIGN) 54 | 47 0x00AF ¯ (MACRON) 55 | 48 0x00B0 ° (DEGREE SIGN) 56 | 49 0x00B1 ± (PLUS-MINUS SIGN) 57 | 50 0x00B2 ² (SUPERSCRIPT TWO) 58 | 51 0x00B3 ³ (SUPERSCRIPT THREE) 59 | 52 0x00B4 ´ (ACUTE ACCENT) 60 | 53 0x00B5 µ (MICRO SIGN) 61 | 54 0x00B6 ¶ (PILCROW SIGN) 62 | 55 0x00B7 · (MIDDLE DOT) 63 | 56 0x00B8 ¸ (CEDILLA) 64 | 57 0x00B9 ¹ (SUPERSCRIPT ONE) 65 | 58 0x00BA º (MASCULINE ORDINAL INDICATOR) 66 | 59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) 67 | 60 0x00BC ¼ (VULGAR FRACTION ONE QUARTER) 68 | 61 0x00BD ½ (VULGAR FRACTION ONE HALF) 69 | 62 0x00BE ¾ (VULGAR FRACTION THREE QUARTERS) 70 | 63 0x00BF ¿ (INVERTED QUESTION MARK) 71 | 64 0x00C0 À (LATIN CAPITAL LETTER A WITH GRAVE) 72 | 65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE) 73 | 66 0x00C2  (LATIN CAPITAL LETTER A WITH CIRCUMFLEX) 74 | 67 0x0102 Ă (LATIN CAPITAL LETTER A WITH BREVE) 75 | 68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS) 76 | 69 0x00C5 Å (LATIN CAPITAL LETTER A WITH RING ABOVE) 77 | 70 0x00C6 Æ (LATIN CAPITAL LETTER AE) 78 | 71 0x00C7 Ç (LATIN CAPITAL LETTER C WITH CEDILLA) 79 | 72 0x00C8 È (LATIN CAPITAL LETTER E WITH GRAVE) 80 | 73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE) 81 | 74 0x00CA Ê (LATIN CAPITAL LETTER E WITH CIRCUMFLEX) 82 | 75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS) 83 | 76 0x0300 ̀ (COMBINING GRAVE ACCENT) 84 | 77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE) 85 | 78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX) 86 | 79 0x00CF Ï (LATIN CAPITAL LETTER I WITH DIAERESIS) 87 | 80 0x0110 Đ (LATIN CAPITAL LETTER D WITH STROKE) 88 | 81 0x00D1 Ñ (LATIN CAPITAL LETTER N WITH TILDE) 89 | 82 0x0309 ̉ (COMBINING HOOK ABOVE) 90 | 83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE) 91 | 84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX) 92 | 85 0x01A0 Ơ (LATIN CAPITAL LETTER O WITH HORN) 93 | 86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS) 94 | 87 0x00D7 × (MULTIPLICATION SIGN) 95 | 88 0x00D8 Ø (LATIN CAPITAL LETTER O WITH STROKE) 96 | 89 0x00D9 Ù (LATIN CAPITAL LETTER U WITH GRAVE) 97 | 90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE) 98 | 91 0x00DB Û (LATIN CAPITAL LETTER U WITH CIRCUMFLEX) 99 | 92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS) 100 | 93 0x01AF Ư (LATIN CAPITAL LETTER U WITH HORN) 101 | 94 0x0303 ̃ (COMBINING TILDE) 102 | 95 0x00DF ß (LATIN SMALL LETTER SHARP S) 103 | 96 0x00E0 à (LATIN SMALL LETTER A WITH GRAVE) 104 | 97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE) 105 | 98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX) 106 | 99 0x0103 ă (LATIN SMALL LETTER A WITH BREVE) 107 | 100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS) 108 | 101 0x00E5 å (LATIN SMALL LETTER A WITH RING ABOVE) 109 | 102 0x00E6 æ (LATIN SMALL LETTER AE) 110 | 103 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA) 111 | 104 0x00E8 è (LATIN SMALL LETTER E WITH GRAVE) 112 | 105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE) 113 | 106 0x00EA ê (LATIN SMALL LETTER E WITH CIRCUMFLEX) 114 | 107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS) 115 | 108 0x0301 ́ (COMBINING ACUTE ACCENT) 116 | 109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE) 117 | 110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX) 118 | 111 0x00EF ï (LATIN SMALL LETTER I WITH DIAERESIS) 119 | 112 0x0111 đ (LATIN SMALL LETTER D WITH STROKE) 120 | 113 0x00F1 ñ (LATIN SMALL LETTER N WITH TILDE) 121 | 114 0x0323 ̣ (COMBINING DOT BELOW) 122 | 115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE) 123 | 116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX) 124 | 117 0x01A1 ơ (LATIN SMALL LETTER O WITH HORN) 125 | 118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS) 126 | 119 0x00F7 ÷ (DIVISION SIGN) 127 | 120 0x00F8 ø (LATIN SMALL LETTER O WITH STROKE) 128 | 121 0x00F9 ù (LATIN SMALL LETTER U WITH GRAVE) 129 | 122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE) 130 | 123 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX) 131 | 124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS) 132 | 125 0x01B0 ư (LATIN SMALL LETTER U WITH HORN) 133 | 126 0x20AB ₫ (DONG SIGN) 134 | 127 0x00FF ÿ (LATIN SMALL LETTER Y WITH DIAERESIS) 135 | -------------------------------------------------------------------------------- /encodings/index-windows-874.txt: -------------------------------------------------------------------------------- 1 | # For details on index index-windows-874.txt see the Encoding Standard 2 | # https://encoding.spec.whatwg.org/ 3 | # 4 | # Identifier: b416583ce125e38474381b31b401a98b19ecf2e57e0998e78a1e18b14894905d 5 | # Date: 2018-01-06 6 | 7 | 0 0x20AC € (EURO SIGN) 8 | 1 0x0081  () 9 | 2 0x0082 ‚ () 10 | 3 0x0083 ƒ () 11 | 4 0x0084 „ () 12 | 5 0x2026 … (HORIZONTAL ELLIPSIS) 13 | 6 0x0086 † () 14 | 7 0x0087 ‡ () 15 | 8 0x0088 ˆ () 16 | 9 0x0089 ‰ () 17 | 10 0x008A Š () 18 | 11 0x008B ‹ () 19 | 12 0x008C Œ () 20 | 13 0x008D  () 21 | 14 0x008E Ž () 22 | 15 0x008F  () 23 | 16 0x0090  () 24 | 17 0x2018 ‘ (LEFT SINGLE QUOTATION MARK) 25 | 18 0x2019 ’ (RIGHT SINGLE QUOTATION MARK) 26 | 19 0x201C “ (LEFT DOUBLE QUOTATION MARK) 27 | 20 0x201D ” (RIGHT DOUBLE QUOTATION MARK) 28 | 21 0x2022 • (BULLET) 29 | 22 0x2013 – (EN DASH) 30 | 23 0x2014 — (EM DASH) 31 | 24 0x0098 ˜ () 32 | 25 0x0099 ™ () 33 | 26 0x009A š () 34 | 27 0x009B › () 35 | 28 0x009C œ () 36 | 29 0x009D  () 37 | 30 0x009E ž () 38 | 31 0x009F Ÿ () 39 | 32 0x00A0   (NO-BREAK SPACE) 40 | 33 0x0E01 ก (THAI CHARACTER KO KAI) 41 | 34 0x0E02 ข (THAI CHARACTER KHO KHAI) 42 | 35 0x0E03 ฃ (THAI CHARACTER KHO KHUAT) 43 | 36 0x0E04 ค (THAI CHARACTER KHO KHWAI) 44 | 37 0x0E05 ฅ (THAI CHARACTER KHO KHON) 45 | 38 0x0E06 ฆ (THAI CHARACTER KHO RAKHANG) 46 | 39 0x0E07 ง (THAI CHARACTER NGO NGU) 47 | 40 0x0E08 จ (THAI CHARACTER CHO CHAN) 48 | 41 0x0E09 ฉ (THAI CHARACTER CHO CHING) 49 | 42 0x0E0A ช (THAI CHARACTER CHO CHANG) 50 | 43 0x0E0B ซ (THAI CHARACTER SO SO) 51 | 44 0x0E0C ฌ (THAI CHARACTER CHO CHOE) 52 | 45 0x0E0D ญ (THAI CHARACTER YO YING) 53 | 46 0x0E0E ฎ (THAI CHARACTER DO CHADA) 54 | 47 0x0E0F ฏ (THAI CHARACTER TO PATAK) 55 | 48 0x0E10 ฐ (THAI CHARACTER THO THAN) 56 | 49 0x0E11 ฑ (THAI CHARACTER THO NANGMONTHO) 57 | 50 0x0E12 ฒ (THAI CHARACTER THO PHUTHAO) 58 | 51 0x0E13 ณ (THAI CHARACTER NO NEN) 59 | 52 0x0E14 ด (THAI CHARACTER DO DEK) 60 | 53 0x0E15 ต (THAI CHARACTER TO TAO) 61 | 54 0x0E16 ถ (THAI CHARACTER THO THUNG) 62 | 55 0x0E17 ท (THAI CHARACTER THO THAHAN) 63 | 56 0x0E18 ธ (THAI CHARACTER THO THONG) 64 | 57 0x0E19 น (THAI CHARACTER NO NU) 65 | 58 0x0E1A บ (THAI CHARACTER BO BAIMAI) 66 | 59 0x0E1B ป (THAI CHARACTER PO PLA) 67 | 60 0x0E1C ผ (THAI CHARACTER PHO PHUNG) 68 | 61 0x0E1D ฝ (THAI CHARACTER FO FA) 69 | 62 0x0E1E พ (THAI CHARACTER PHO PHAN) 70 | 63 0x0E1F ฟ (THAI CHARACTER FO FAN) 71 | 64 0x0E20 ภ (THAI CHARACTER PHO SAMPHAO) 72 | 65 0x0E21 ม (THAI CHARACTER MO MA) 73 | 66 0x0E22 ย (THAI CHARACTER YO YAK) 74 | 67 0x0E23 ร (THAI CHARACTER RO RUA) 75 | 68 0x0E24 ฤ (THAI CHARACTER RU) 76 | 69 0x0E25 ล (THAI CHARACTER LO LING) 77 | 70 0x0E26 ฦ (THAI CHARACTER LU) 78 | 71 0x0E27 ว (THAI CHARACTER WO WAEN) 79 | 72 0x0E28 ศ (THAI CHARACTER SO SALA) 80 | 73 0x0E29 ษ (THAI CHARACTER SO RUSI) 81 | 74 0x0E2A ส (THAI CHARACTER SO SUA) 82 | 75 0x0E2B ห (THAI CHARACTER HO HIP) 83 | 76 0x0E2C ฬ (THAI CHARACTER LO CHULA) 84 | 77 0x0E2D อ (THAI CHARACTER O ANG) 85 | 78 0x0E2E ฮ (THAI CHARACTER HO NOKHUK) 86 | 79 0x0E2F ฯ (THAI CHARACTER PAIYANNOI) 87 | 80 0x0E30 ะ (THAI CHARACTER SARA A) 88 | 81 0x0E31 ั (THAI CHARACTER MAI HAN-AKAT) 89 | 82 0x0E32 า (THAI CHARACTER SARA AA) 90 | 83 0x0E33 ำ (THAI CHARACTER SARA AM) 91 | 84 0x0E34 ิ (THAI CHARACTER SARA I) 92 | 85 0x0E35 ี (THAI CHARACTER SARA II) 93 | 86 0x0E36 ึ (THAI CHARACTER SARA UE) 94 | 87 0x0E37 ื (THAI CHARACTER SARA UEE) 95 | 88 0x0E38 ุ (THAI CHARACTER SARA U) 96 | 89 0x0E39 ู (THAI CHARACTER SARA UU) 97 | 90 0x0E3A ฺ (THAI CHARACTER PHINTHU) 98 | 95 0x0E3F ฿ (THAI CURRENCY SYMBOL BAHT) 99 | 96 0x0E40 เ (THAI CHARACTER SARA E) 100 | 97 0x0E41 แ (THAI CHARACTER SARA AE) 101 | 98 0x0E42 โ (THAI CHARACTER SARA O) 102 | 99 0x0E43 ใ (THAI CHARACTER SARA AI MAIMUAN) 103 | 100 0x0E44 ไ (THAI CHARACTER SARA AI MAIMALAI) 104 | 101 0x0E45 ๅ (THAI CHARACTER LAKKHANGYAO) 105 | 102 0x0E46 ๆ (THAI CHARACTER MAIYAMOK) 106 | 103 0x0E47 ็ (THAI CHARACTER MAITAIKHU) 107 | 104 0x0E48 ่ (THAI CHARACTER MAI EK) 108 | 105 0x0E49 ้ (THAI CHARACTER MAI THO) 109 | 106 0x0E4A ๊ (THAI CHARACTER MAI TRI) 110 | 107 0x0E4B ๋ (THAI CHARACTER MAI CHATTAWA) 111 | 108 0x0E4C ์ (THAI CHARACTER THANTHAKHAT) 112 | 109 0x0E4D ํ (THAI CHARACTER NIKHAHIT) 113 | 110 0x0E4E ๎ (THAI CHARACTER YAMAKKAN) 114 | 111 0x0E4F ๏ (THAI CHARACTER FONGMAN) 115 | 112 0x0E50 ๐ (THAI DIGIT ZERO) 116 | 113 0x0E51 ๑ (THAI DIGIT ONE) 117 | 114 0x0E52 ๒ (THAI DIGIT TWO) 118 | 115 0x0E53 ๓ (THAI DIGIT THREE) 119 | 116 0x0E54 ๔ (THAI DIGIT FOUR) 120 | 117 0x0E55 ๕ (THAI DIGIT FIVE) 121 | 118 0x0E56 ๖ (THAI DIGIT SIX) 122 | 119 0x0E57 ๗ (THAI DIGIT SEVEN) 123 | 120 0x0E58 ๘ (THAI DIGIT EIGHT) 124 | 121 0x0E59 ๙ (THAI DIGIT NINE) 125 | 122 0x0E5A ๚ (THAI CHARACTER ANGKHANKHU) 126 | 123 0x0E5B ๛ (THAI CHARACTER KHOMUT) 127 | -------------------------------------------------------------------------------- /encodings/index-x-mac-cyrillic.txt: -------------------------------------------------------------------------------- 1 | # For details on index index-x-mac-cyrillic.txt see the Encoding Standard 2 | # https://encoding.spec.whatwg.org/ 3 | # 4 | # Identifier: 73e8e7642c6fa9de29d42819b47fba55b58666fb1e339faeb4a89a0bd7c24d43 5 | # Date: 2018-01-06 6 | 7 | 0 0x0410 А (CYRILLIC CAPITAL LETTER A) 8 | 1 0x0411 Б (CYRILLIC CAPITAL LETTER BE) 9 | 2 0x0412 В (CYRILLIC CAPITAL LETTER VE) 10 | 3 0x0413 Г (CYRILLIC CAPITAL LETTER GHE) 11 | 4 0x0414 Д (CYRILLIC CAPITAL LETTER DE) 12 | 5 0x0415 Е (CYRILLIC CAPITAL LETTER IE) 13 | 6 0x0416 Ж (CYRILLIC CAPITAL LETTER ZHE) 14 | 7 0x0417 З (CYRILLIC CAPITAL LETTER ZE) 15 | 8 0x0418 И (CYRILLIC CAPITAL LETTER I) 16 | 9 0x0419 Й (CYRILLIC CAPITAL LETTER SHORT I) 17 | 10 0x041A К (CYRILLIC CAPITAL LETTER KA) 18 | 11 0x041B Л (CYRILLIC CAPITAL LETTER EL) 19 | 12 0x041C М (CYRILLIC CAPITAL LETTER EM) 20 | 13 0x041D Н (CYRILLIC CAPITAL LETTER EN) 21 | 14 0x041E О (CYRILLIC CAPITAL LETTER O) 22 | 15 0x041F П (CYRILLIC CAPITAL LETTER PE) 23 | 16 0x0420 Р (CYRILLIC CAPITAL LETTER ER) 24 | 17 0x0421 С (CYRILLIC CAPITAL LETTER ES) 25 | 18 0x0422 Т (CYRILLIC CAPITAL LETTER TE) 26 | 19 0x0423 У (CYRILLIC CAPITAL LETTER U) 27 | 20 0x0424 Ф (CYRILLIC CAPITAL LETTER EF) 28 | 21 0x0425 Х (CYRILLIC CAPITAL LETTER HA) 29 | 22 0x0426 Ц (CYRILLIC CAPITAL LETTER TSE) 30 | 23 0x0427 Ч (CYRILLIC CAPITAL LETTER CHE) 31 | 24 0x0428 Ш (CYRILLIC CAPITAL LETTER SHA) 32 | 25 0x0429 Щ (CYRILLIC CAPITAL LETTER SHCHA) 33 | 26 0x042A Ъ (CYRILLIC CAPITAL LETTER HARD SIGN) 34 | 27 0x042B Ы (CYRILLIC CAPITAL LETTER YERU) 35 | 28 0x042C Ь (CYRILLIC CAPITAL LETTER SOFT SIGN) 36 | 29 0x042D Э (CYRILLIC CAPITAL LETTER E) 37 | 30 0x042E Ю (CYRILLIC CAPITAL LETTER YU) 38 | 31 0x042F Я (CYRILLIC CAPITAL LETTER YA) 39 | 32 0x2020 † (DAGGER) 40 | 33 0x00B0 ° (DEGREE SIGN) 41 | 34 0x0490 Ґ (CYRILLIC CAPITAL LETTER GHE WITH UPTURN) 42 | 35 0x00A3 £ (POUND SIGN) 43 | 36 0x00A7 § (SECTION SIGN) 44 | 37 0x2022 • (BULLET) 45 | 38 0x00B6 ¶ (PILCROW SIGN) 46 | 39 0x0406 І (CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I) 47 | 40 0x00AE ® (REGISTERED SIGN) 48 | 41 0x00A9 © (COPYRIGHT SIGN) 49 | 42 0x2122 ™ (TRADE MARK SIGN) 50 | 43 0x0402 Ђ (CYRILLIC CAPITAL LETTER DJE) 51 | 44 0x0452 ђ (CYRILLIC SMALL LETTER DJE) 52 | 45 0x2260 ≠ (NOT EQUAL TO) 53 | 46 0x0403 Ѓ (CYRILLIC CAPITAL LETTER GJE) 54 | 47 0x0453 ѓ (CYRILLIC SMALL LETTER GJE) 55 | 48 0x221E ∞ (INFINITY) 56 | 49 0x00B1 ± (PLUS-MINUS SIGN) 57 | 50 0x2264 ≤ (LESS-THAN OR EQUAL TO) 58 | 51 0x2265 ≥ (GREATER-THAN OR EQUAL TO) 59 | 52 0x0456 і (CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I) 60 | 53 0x00B5 µ (MICRO SIGN) 61 | 54 0x0491 ґ (CYRILLIC SMALL LETTER GHE WITH UPTURN) 62 | 55 0x0408 Ј (CYRILLIC CAPITAL LETTER JE) 63 | 56 0x0404 Є (CYRILLIC CAPITAL LETTER UKRAINIAN IE) 64 | 57 0x0454 є (CYRILLIC SMALL LETTER UKRAINIAN IE) 65 | 58 0x0407 Ї (CYRILLIC CAPITAL LETTER YI) 66 | 59 0x0457 ї (CYRILLIC SMALL LETTER YI) 67 | 60 0x0409 Љ (CYRILLIC CAPITAL LETTER LJE) 68 | 61 0x0459 љ (CYRILLIC SMALL LETTER LJE) 69 | 62 0x040A Њ (CYRILLIC CAPITAL LETTER NJE) 70 | 63 0x045A њ (CYRILLIC SMALL LETTER NJE) 71 | 64 0x0458 ј (CYRILLIC SMALL LETTER JE) 72 | 65 0x0405 Ѕ (CYRILLIC CAPITAL LETTER DZE) 73 | 66 0x00AC ¬ (NOT SIGN) 74 | 67 0x221A √ (SQUARE ROOT) 75 | 68 0x0192 ƒ (LATIN SMALL LETTER F WITH HOOK) 76 | 69 0x2248 ≈ (ALMOST EQUAL TO) 77 | 70 0x2206 ∆ (INCREMENT) 78 | 71 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) 79 | 72 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) 80 | 73 0x2026 … (HORIZONTAL ELLIPSIS) 81 | 74 0x00A0   (NO-BREAK SPACE) 82 | 75 0x040B Ћ (CYRILLIC CAPITAL LETTER TSHE) 83 | 76 0x045B ћ (CYRILLIC SMALL LETTER TSHE) 84 | 77 0x040C Ќ (CYRILLIC CAPITAL LETTER KJE) 85 | 78 0x045C ќ (CYRILLIC SMALL LETTER KJE) 86 | 79 0x0455 ѕ (CYRILLIC SMALL LETTER DZE) 87 | 80 0x2013 – (EN DASH) 88 | 81 0x2014 — (EM DASH) 89 | 82 0x201C “ (LEFT DOUBLE QUOTATION MARK) 90 | 83 0x201D ” (RIGHT DOUBLE QUOTATION MARK) 91 | 84 0x2018 ‘ (LEFT SINGLE QUOTATION MARK) 92 | 85 0x2019 ’ (RIGHT SINGLE QUOTATION MARK) 93 | 86 0x00F7 ÷ (DIVISION SIGN) 94 | 87 0x201E „ (DOUBLE LOW-9 QUOTATION MARK) 95 | 88 0x040E Ў (CYRILLIC CAPITAL LETTER SHORT U) 96 | 89 0x045E ў (CYRILLIC SMALL LETTER SHORT U) 97 | 90 0x040F Џ (CYRILLIC CAPITAL LETTER DZHE) 98 | 91 0x045F џ (CYRILLIC SMALL LETTER DZHE) 99 | 92 0x2116 № (NUMERO SIGN) 100 | 93 0x0401 Ё (CYRILLIC CAPITAL LETTER IO) 101 | 94 0x0451 ё (CYRILLIC SMALL LETTER IO) 102 | 95 0x044F я (CYRILLIC SMALL LETTER YA) 103 | 96 0x0430 а (CYRILLIC SMALL LETTER A) 104 | 97 0x0431 б (CYRILLIC SMALL LETTER BE) 105 | 98 0x0432 в (CYRILLIC SMALL LETTER VE) 106 | 99 0x0433 г (CYRILLIC SMALL LETTER GHE) 107 | 100 0x0434 д (CYRILLIC SMALL LETTER DE) 108 | 101 0x0435 е (CYRILLIC SMALL LETTER IE) 109 | 102 0x0436 ж (CYRILLIC SMALL LETTER ZHE) 110 | 103 0x0437 з (CYRILLIC SMALL LETTER ZE) 111 | 104 0x0438 и (CYRILLIC SMALL LETTER I) 112 | 105 0x0439 й (CYRILLIC SMALL LETTER SHORT I) 113 | 106 0x043A к (CYRILLIC SMALL LETTER KA) 114 | 107 0x043B л (CYRILLIC SMALL LETTER EL) 115 | 108 0x043C м (CYRILLIC SMALL LETTER EM) 116 | 109 0x043D н (CYRILLIC SMALL LETTER EN) 117 | 110 0x043E о (CYRILLIC SMALL LETTER O) 118 | 111 0x043F п (CYRILLIC SMALL LETTER PE) 119 | 112 0x0440 р (CYRILLIC SMALL LETTER ER) 120 | 113 0x0441 с (CYRILLIC SMALL LETTER ES) 121 | 114 0x0442 т (CYRILLIC SMALL LETTER TE) 122 | 115 0x0443 у (CYRILLIC SMALL LETTER U) 123 | 116 0x0444 ф (CYRILLIC SMALL LETTER EF) 124 | 117 0x0445 х (CYRILLIC SMALL LETTER HA) 125 | 118 0x0446 ц (CYRILLIC SMALL LETTER TSE) 126 | 119 0x0447 ч (CYRILLIC SMALL LETTER CHE) 127 | 120 0x0448 ш (CYRILLIC SMALL LETTER SHA) 128 | 121 0x0449 щ (CYRILLIC SMALL LETTER SHCHA) 129 | 122 0x044A ъ (CYRILLIC SMALL LETTER HARD SIGN) 130 | 123 0x044B ы (CYRILLIC SMALL LETTER YERU) 131 | 124 0x044C ь (CYRILLIC SMALL LETTER SOFT SIGN) 132 | 125 0x044D э (CYRILLIC SMALL LETTER E) 133 | 126 0x044E ю (CYRILLIC SMALL LETTER YU) 134 | 127 0x20AC € (EURO SIGN) 135 | -------------------------------------------------------------------------------- /encodings/make_encodings.lua: -------------------------------------------------------------------------------- 1 | -- this file should generate Lua module with mappings between 8-bit encodings and UTF-8 2 | -- source of encoding files: https://encoding.spec.whatwg.org/#legacy-single-byte-encodings 3 | -- each encoding starts at value of 128 - first 127 characters are the same as ASCII 4 | local lfs = require "lfs" 5 | 6 | -- insert replacement char at empty fields 7 | local replacement_char = utf8.char(0xFFFD) 8 | 9 | local function load_enc(filename) 10 | local enc = {} 11 | local i = 0 12 | local last_pos 13 | for line in io.lines(filename) do 14 | local pos, uni_char = line:match("^%s*(%d+)%s*0x(%x+)") 15 | if pos then 16 | pos = tonumber(pos) 17 | if i < pos then 18 | for x = i, pos - 1 do 19 | enc[#enc+1] = replacement_char 20 | end 21 | i = pos 22 | end 23 | enc[#enc+1] = utf8.char(tonumber(uni_char, 16)) 24 | i = i + 1 25 | last_pos = pos 26 | end 27 | end 28 | return table.concat(enc) 29 | end 30 | 31 | local dir = "encodings" 32 | 33 | local encodings = {} 34 | local named_encodings = {} 35 | for file in lfs.dir(dir) do 36 | local curr_enc = file:match("^index%-(.-)%.txt") 37 | if curr_enc then 38 | local encoding = load_enc(dir .. "/" .. file) 39 | encodings[#encodings+1] = { 40 | name = curr_enc, 41 | encoding = encoding 42 | } 43 | named_encodings[curr_enc] = encoding 44 | end 45 | end 46 | 47 | if arg[1] then 48 | -- try to translate from the encoding given in the argument to utf-8" 49 | local enc = named_encodings[arg[1]] 50 | if not enc then 51 | print("unknown encoding", arg[1]) 52 | os.exit() 53 | end 54 | -- prepare mapping from 8-bit chars to UTF-8 55 | local mapping = {} 56 | local i = 128 57 | for pos, charpoint in utf8.codes(enc) do 58 | mapping[i] = utf8.char(charpoint) 59 | i = i + 1 60 | end 61 | -- read testing string from stdin 62 | local str = io.read("*all") 63 | -- convert string 64 | local newstr = str:gsub("(.)", function(char) 65 | local charpoint = string.byte(char) 66 | if charpoint > 127 then 67 | print(char, charpoint, mapping[charpoint]) 68 | return mapping[charpoint] 69 | else 70 | return false 71 | end 72 | end) 73 | print(newstr) 74 | 75 | else 76 | print "return {" 77 | 78 | for k,v in ipairs(encodings) do 79 | print("['" .. v.name .. "'] = '" .. v.encoding .. "',") 80 | end 81 | 82 | print "}" 83 | end 84 | -------------------------------------------------------------------------------- /examples/simple.lua: -------------------------------------------------------------------------------- 1 | local cssobj = require "luaxml-cssquery" 2 | local domobj = require "luaxml-domobject" 3 | 4 | local xmltext = [[ 5 | 6 | 7 |

Header

8 |

Some text, italics

9 | 10 | 11 | ]] 12 | 13 | local dom = domobj.parse(xmltext) 14 | local css = cssobj() 15 | 16 | css:add_selector("h1", function(obj) 17 | print("header found: " .. obj:get_text()) 18 | end) 19 | 20 | css:add_selector("p", function(obj) 21 | print("paragraph found: " .. obj:get_text()) 22 | end) 23 | 24 | css:add_selector("i", function(obj) 25 | print("found italics: " .. obj:get_text()) 26 | end) 27 | 28 | dom:traverse_elements(function(el) 29 | -- find selectors that match the current element 30 | local querylist = css:match_querylist(el) 31 | -- add templates to the element 32 | css:apply_querylist(el,querylist) 33 | end) 34 | -------------------------------------------------------------------------------- /examples/xmltotex.lua: -------------------------------------------------------------------------------- 1 | local cssobj = require "luaxml-cssquery" 2 | local domobj = require "luaxml-domobject" 3 | local lpeg = require "lpeg" 4 | 5 | local xml = [[ 6 | 7 | 8 |

Header

9 |

First paragraph& some\\ bad characters

10 |

Second paragraph

11 | 12 | 13 | ]] 14 | 15 | local dom = domobj.parse(xml) 16 | local css = cssobj() 17 | 18 | -- local function applytex(obj, parameters) 19 | -- local function add_parameter(name) 20 | -- local t = obj[name] or {} 21 | -- table.insert(t, parameters[name]) 22 | -- print("applying", name, parameters[name]) 23 | -- obj[name] = t 24 | -- end 25 | -- add_parameter("pre") 26 | -- add_parameter("add") 27 | -- end 28 | 29 | -- escape special LaTeX characters. More should be added 30 | local tex_escape = function(s) 31 | local codes = {["&"] = "\\&{}", ["\\"] = "\\textbackslash{}"} 32 | return s:gsub("([&\\])", function(a) return codes[a] end) 33 | end 34 | 35 | local identity_escape = function(s) return s end 36 | 37 | local collapsed_ws = function(s) 38 | return s:gsub("(%s%s+)", function(a) return a:sub(1,1) end) 39 | end 40 | 41 | --- Declare new template. 42 | -- template should contain <.> placeholder, which will be replaced 43 | -- by content of the matched element 44 | local function add_template(selector, template) 45 | css:add_selector(selector, function(obj, parameters) 46 | local t = obj.template or {} 47 | t[#t+1] = parameters.template 48 | obj.template = t 49 | end, {template=template}) 50 | end 51 | 52 | 53 | --- 54 | local function apply_template(template, content, element) 55 | return template:gsub("<.>", content) 56 | end 57 | 58 | -- we must escape \ characters in macro names 59 | add_template("p", "<.>\n\n") 60 | add_template("h1", "\\section{<.>}") 61 | add_template("i", "\\textit{<.>}") 62 | -- css:add_selector("p", applytex, {pre = "", add = "\n\n"}) 63 | -- css:add_selector("h1", applytex, {pre = "\\section{", add = "}"}) 64 | -- css:add_selector("i", applytex, {pre = "\\textit{", add = "}"}) 65 | 66 | -- traverse all elements and add templates to them 67 | dom:traverse_elements(function(el) 68 | -- find selectors that match the current element 69 | local querylist = css:match_querylist(el) 70 | -- add templates to the element 71 | css:apply_querylist(el,querylist) 72 | end) 73 | 74 | local function serialize_tex(el) 75 | local t = {} 76 | 77 | if el:is_text() then 78 | local text = tex_escape(el._text) 79 | table.insert(t, text) 80 | else 81 | -- print(el.pre) 82 | -- local pre = el.pre or {} 83 | -- for _, x in ipairs(pre) do 84 | -- table.insert(t,x) 85 | -- end 86 | local current_nodes = {} 87 | for _, x in ipairs(el:get_children()) do 88 | -- we need to give special handling to text nodes, because we may want 89 | -- them escaped, verbatim, or with preserved whitespace 90 | local current = {} 91 | if x:is_text() then 92 | current.type = "text" 93 | local text = x._text 94 | local escaped = tex_escape(text) 95 | current.verbatim = text 96 | current.collapsed = collapsed_ws(escaped) -- this is used by default, 97 | -- with escaped special sequences and collapsed whitescpace 98 | current.escaped = escaped 99 | else 100 | current.type = "node" 101 | current.content = serialize_tex(x) 102 | end 103 | table.insert(current_nodes, current) 104 | end 105 | -- local content = table.concat(h) 106 | local templates = el.template or {} 107 | local content = {} 108 | -- only escaped and collapsed text is added at the moment. 109 | -- it would be nice to add support for verbatim elements, where 110 | -- unescaped text could be added, but it is not here yet 111 | for _, v in ipairs(current_nodes) do 112 | -- content comes from elements, collapsed is escaped text content 113 | local text = v.content or v.collapsed 114 | content[#content+1] = text 115 | end 116 | content = table.concat(content) 117 | 118 | for _, template in ipairs(templates) do 119 | content = apply_template(template, content, el ) 120 | end 121 | table.insert(t, content) 122 | -- local add = el.add or {} 123 | -- for i= #add, 1, -1 do 124 | -- table.insert(t,add[i]) 125 | end 126 | return table.concat(t) 127 | end 128 | 129 | -- process the document from the root node, get TeX code from 130 | -- the templates 131 | print(serialize_tex(dom:root_node())) 132 | -------------------------------------------------------------------------------- /ldoc-latex.lua: -------------------------------------------------------------------------------- 1 | local function escape(s) 2 | local escapes = { 3 | _ = "\\_{}", 4 | ["\\"] = "\\backspace{}", 5 | ["%"] = "\\%", 6 | ["{"] = "\\{", 7 | ["}"] = "\\}", 8 | } 9 | -- only process strings 10 | if type(s) == "string" then 11 | s = s:gsub("%s+", " ") 12 | return s:gsub("([_\\%%{}])", function(a) return escapes[a] end) 13 | end 14 | return s 15 | end 16 | 17 | local function print_template(format, s) 18 | print(string.format(format, escape(s))) 19 | end 20 | local function print_module(mod) 21 | print_template("\\modulename{%s}", mod.mod_name) 22 | print_template("\\modulesummary{%s}", mod.summary) 23 | -- for k,v in pairs(mod.sections.by_name) do print("mod", k,v) end 24 | end 25 | 26 | local function print_class(mod, class, items) 27 | print_template("\\moduleclass{%s}", class) 28 | local items = items or {} 29 | for _, item in ipairs(items) do 30 | 31 | local par = {} 32 | local map = item.params.map or {} 33 | for k,v in ipairs(item.params or {}) do 34 | par[#par+1] = escape(v) 35 | end 36 | print(string.format("\\functionname{%s}{%s}", escape(item.name), table.concat(par, ", "))) 37 | print_template("\\functionsummary{%s}", item.summary) 38 | for x,y in ipairs(item.params) do 39 | print(string.format("\\functionparam{%s}{%s}", escape(y), escape(map[y]))) 40 | -- print(x,y) 41 | end 42 | for _, ret in ipairs(item.ret or {}) do 43 | print_template("\\functionreturn{%s}", ret) 44 | end 45 | -- print(string.format("\\functionreturn{%s}", escape(item.ret ) )) 46 | end 47 | 48 | end 49 | 50 | return { 51 | filter = function (t) 52 | local modules = {} 53 | local class_sequence = {} 54 | for modid, mod in ipairs(t) do 55 | -- print basic information about module 56 | print_module(mod) 57 | local classes = {} 58 | for _, item in ipairs(mod.items) do 59 | if item.type == 'function' or item.type == "lfunction" then 60 | -- move functions to tables corresponding to their classes 61 | local curr_class = item.section 62 | if curr_class then 63 | local class = classes[curr_class] or {} 64 | class[#class+1] = item 65 | -- we want to list classes in the order as they appear in the module 66 | if not classes[curr_class] then table.insert(class_sequence, curr_class) end 67 | classes[curr_class] = class 68 | end 69 | end 70 | end 71 | for _,k in ipairs(class_sequence) do 72 | local v = classes[k] 73 | if k == "lfunction" then 74 | k = "Local functions" 75 | elseif k == "function" then 76 | k = "Functions" 77 | end 78 | -- print class info and functions 79 | print_class(mod, k,v) 80 | end 81 | end 82 | end 83 | } 84 | -------------------------------------------------------------------------------- /luaxml-encodings.lua: -------------------------------------------------------------------------------- 1 | --- Convert 8-bit encodings to UTF-8 2 | --- @module luaxml-encodings 3 | 4 | -- this table is generated automatically by this command: 5 | -- texlua encodings/make_encodings.lua 6 | local encodings = { 7 | ['ibm866'] = 'АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмноп░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀рстуфхцчшщъыьэюяЁёЄєЇїЎў°∙·√№¤■ ', 8 | ['iso-8859-5'] = '�������������������������������� ЁЂЃЄЅІЇЈЉЊЋЌ­ЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя№ёђѓєѕіїјљњћќ§ўџ', 9 | ['macintosh'] = 'ÄÅÇÉÑÖÜáàâäãåçéèêëíìîïñóòôöõúùûü†°¢£§•¶ß®©™´¨≠ÆØ∞±≤≥¥µ∂∑∏π∫ªºΩæø¿¡¬√ƒ≈∆«»… ÀÃÕŒœ–—“”‘’÷◊ÿŸ⁄€‹›fifl‡·‚„‰ÂÊÁËÈÍÎÏÌÓÔÒÚÛÙıˆ˜¯˘˙˚¸˝˛ˇ', 10 | ['windows-1251'] = 'ЂЃ‚ѓ„…†‡€‰Љ‹ЊЌЋЏђ‘’“”•–—�™љ›њќћџ ЎўЈ¤Ґ¦§Ё©Є«¬­®Ї°±Ііґµ¶·ё№є»јЅѕїАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя', 11 | ['iso-8859-13'] = '�������������������������������� ”¢£¤„¦§Ø©Ŗ«¬­®Æ°±²³“µ¶·ø¹ŗ»¼½¾æĄĮĀĆÄÅĘĒČÉŹĖĢĶĪĻŠŃŅÓŌÕÖ×ŲŁŚŪÜŻŽßąįāćäåęēčéźėģķīļšńņóōõö÷ųłśūüżž’', 12 | ['iso-8859-6'] = '�������������������������������� ���¤�������،­�������������؛���؟�ءآأؤإئابةتثجحخدذرزسشصضطظعغ�����ـفقكلمنهوىيًٌٍَُِّْ', 13 | ['iso-8859-8'] = '�������������������������������� �¢£¤¥¦§¨©×«¬­®¯°±²³´µ¶·¸¹÷»¼½¾��������������������������������‗אבגדהוזחטיךכלםמןנסעףפץצקרשת��‎‏', 14 | ['iso-8859-4'] = '�������������������������������� ĄĸŖ¤Ĩϧ¨ŠĒĢŦ­Ž¯°ą˛ŗ´ĩšēģŧŊžŋĀÁÂÃÄÅÆĮČÉĘËĖÍÎĪĐŅŌĶÔÕÖרŲÚÛÜŨŪßāáâãäåæįčéęëėíîīđņōķôõö÷øųúûüũū˙', 15 | ['koi8-r'] = '─│┌┐└┘├┤┬┴┼▀▄█▌▐░▒▓⌠■∙√≈≤≥ ⌡°²·÷═║╒ё╓╔╕╖╗╘╙╚╛╜╝╞╟╠╡Ё╢╣╤╥╦╧╨╩╪╫╬©юабцдефгхийклмнопярстужвьызшэщчъЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧЪ', 16 | ['iso-8859-7'] = '�������������������������������� ‘’£€₯¦§¨©ͺ«¬­�―°±²³΄΅Ά·ΈΉΊ»Ό½ΎΏΐΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡ�ΣΤΥΦΧΨΩΪΫάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώ', 17 | ['iso-8859-3'] = '�������������������������������� Ħ˘£¤�Ĥ§¨İŞĞĴ­�ݰħ²³´µĥ·¸ışğĵ½�żÀÁÂ�ÄĊĈÇÈÉÊËÌÍÎÏ�ÑÒÓÔĠÖ×ĜÙÚÛÜŬŜßàáâ�äċĉçèéêëìíîï�ñòóôġö÷ĝùúûüŭŝ˙', 18 | ['windows-1256'] = '€پ‚ƒ„…†‡ˆ‰ٹ‹Œچژڈگ‘’“”•–—ک™ڑ›œ‌‍ں ،¢£¤¥¦§¨©ھ«¬­®¯°±²³´µ¶·¸¹؛»¼½¾؟ہءآأؤإئابةتثجحخدذرزسشصض×طظعغـفقكàلâمنهوçèéêëىيîïًٌٍَôُِ÷ّùْûü‎‏ے', 19 | ['windows-1258'] = '€�‚ƒ„…†‡ˆ‰�‹Œ����‘’“”•–—˜™�›œ��Ÿ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂĂÄÅÆÇÈÉÊË̀ÍÎÏĐÑ̉ÓÔƠÖרÙÚÛÜỮßàáâăäåæçèéêë́íîïđṇ̃óôơö÷øùúûüư₫ÿ', 20 | ['iso-8859-14'] = '�������������������������������� Ḃḃ£ĊċḊ§Ẁ©ẂḋỲ­®ŸḞḟĠġṀṁ¶ṖẁṗẃṠỳẄẅṡÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏŴÑÒÓÔÕÖṪØÙÚÛÜÝŶßàáâãäåæçèéêëìíîïŵñòóôõöṫøùúûüýŷÿ', 21 | ['windows-1252'] = '€�‚ƒ„…†‡ˆ‰Š‹Œ�Ž��‘’“”•–—˜™š›œ�žŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', 22 | ['iso-8859-15'] = '�������������������������������� ¡¢£€¥Š§š©ª«¬­®¯°±²³Žµ¶·ž¹º»ŒœŸ¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', 23 | ['x-mac-cyrillic'] = 'АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ†°Ґ£§•¶І®©™Ђђ≠Ѓѓ∞±≤≥іµґЈЄєЇїЉљЊњјЅ¬√ƒ≈∆«»… ЋћЌќѕ–—“”‘’÷„ЎўЏџ№Ёёяабвгдежзийклмнопрстуфхцчшщъыьэю€', 24 | ['windows-1254'] = '€�‚ƒ„…†‡ˆ‰Š‹Œ����‘’“”•–—˜™š›œ��Ÿ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏĞÑÒÓÔÕÖרÙÚÛÜİŞßàáâãäåæçèéêëìíîïğñòóôõö÷øùúûüışÿ', 25 | ['windows-1255'] = '€�‚ƒ„…†‡ˆ‰�‹�����‘’“”•–—˜™�›���� ¡¢£₪¥¦§¨©×«¬­®¯°±²³´µ¶·¸¹÷»¼½¾¿ְֱֲֳִֵֶַָֹֺֻּֽ־ֿ׀ׁׂ׃װױײ׳״�������אבגדהוזחטיךכלםמןנסעףפץצקרשת��‎‏', 26 | ['windows-1250'] = '€�‚�„…†‡�‰Š‹ŚŤŽŹ�‘’“”•–—�™š›śťžź ˇ˘Ł¤Ą¦§¨©Ş«¬­®Ż°±˛ł´µ¶·¸ąş»Ľ˝ľżŔÁÂĂÄĹĆÇČÉĘËĚÍÎĎĐŃŇÓÔŐÖ×ŘŮÚŰÜÝŢßŕáâăäĺćçčéęëěíîďđńňóôőö÷řůúűüýţ˙', 27 | ['koi8-u'] = '─│┌┐└┘├┤┬┴┼▀▄█▌▐░▒▓⌠■∙√≈≤≥ ⌡°²·÷═║╒ёє╔ії╗╘╙╚╛ґў╞╟╠╡ЁЄ╣ІЇ╦╧╨╩╪ҐЎ©юабцдефгхийклмнопярстужвьызшэщчъЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧЪ', 28 | ['iso-8859-10'] = '�������������������������������� ĄĒĢĪĨͧĻĐŠŦŽ­ŪŊ°ąēģīĩķ·ļđšŧž―ūŋĀÁÂÃÄÅÆĮČÉĘËĖÍÎÏÐŅŌÓÔÕÖŨØŲÚÛÜÝÞßāáâãäåæįčéęëėíîïðņōóôõöũøųúûüýþĸ', 29 | ['windows-1257'] = '€�‚�„…†‡�‰�‹�¨ˇ¸�‘’“”•–—�™�›�¯˛� �¢£¤�¦§Ø©Ŗ«¬­®Æ°±²³´µ¶·ø¹ŗ»¼½¾æĄĮĀĆÄÅĘĒČÉŹĖĢĶĪĻŠŃŅÓŌÕÖ×ŲŁŚŪÜŻŽßąįāćäåęēčéźėģķīļšńņóōõö÷ųłśūüżž˙', 30 | ['windows-1253'] = '€�‚ƒ„…†‡�‰�‹�����‘’“”•–—�™�›���� ΅Ά£¤¥¦§¨©�«¬­®―°±²³΄µ¶·ΈΉΊ»Ό½ΎΏΐΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡ�ΣΤΥΦΧΨΩΪΫάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώ', 31 | ['iso-8859-2'] = '�������������������������������� Ą˘Ł¤ĽŚ§¨ŠŞŤŹ­ŽŻ°ą˛ł´ľśˇ¸šşťź˝žżŔÁÂĂÄĹĆÇČÉĘËĚÍÎĎĐŃŇÓÔŐÖ×ŘŮÚŰÜÝŢßŕáâăäĺćçčéęëěíîďđńňóôőö÷řůúűüýţ˙', 32 | ['windows-874'] = '€����…�����������‘’“”•–—�������� กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลฦวศษสหฬอฮฯะัาำิีึืฺุู����฿เแโใไๅๆ็่้๊๋์ํ๎๏๐๑๒๓๔๕๖๗๘๙๚๛', 33 | ['iso-8859-16'] = '�������������������������������� ĄąŁ€„Чš©Ș«Ź­źŻ°±ČłŽ”¶·žčș»ŒœŸżÀÁÂĂÄĆÆÇÈÉÊËÌÍÎÏĐŃÒÓÔŐÖŚŰÙÚÛÜĘȚßàáâăäćæçèéêëìíîïđńòóôőöśűùúûüęțÿ', 34 | } 35 | 36 | 37 | local utfchar = utf8.char 38 | 39 | --- Try to find an encoding in HTML string 40 | ---@param str string HTML document 41 | ---@param len number count of characters from the start of the string where it should search for the encoding metadata 42 | ---@return string encoding identifier, or nil and message if no encoding was found 43 | local function find_html_encoding(str, len) 44 | -- try to find encoding in the html document 45 | -- we limit search length, because encoding should be in the document head, ideally near the start 46 | local len = len or 4096 47 | local sub = str:sub(1, len) 48 | for meta in sub:gmatch("") do 49 | local charset = meta:match("charset%s*=%s*[\"']?(.-)[\"']") 50 | if charset then return string.lower(charset) end 51 | end 52 | return nil, "Cannot find the document encoding" 53 | end 54 | 55 | local function load_mapping(enc_name) 56 | local enc_name = enc_name or "" 57 | local enc = encodings[enc_name] 58 | if not enc then return nil, "Cannot load encoding " .. enc_name end 59 | local mapping,i = {}, 128 60 | for pos, codepoint in utf8.codes(enc) do 61 | mapping[i] = utfchar(codepoint) 62 | i = i + 1 63 | end 64 | return mapping 65 | end 66 | 67 | --- Convert string to utf-8 68 | ---@param text string for converting 69 | ---@param mapping table 70 | ---@return string converted string 71 | local function recode(text, mapping) 72 | return text:gsub("(.)", function(char) 73 | local charpoint = string.byte(char) 74 | if charpoint > 127 then 75 | return mapping[charpoint] 76 | else 77 | return false 78 | end 79 | end) 80 | end 81 | 82 | 83 | return { 84 | encodings = encodings, 85 | find_html_encoding = find_html_encoding, 86 | load_mapping = load_mapping, 87 | recode = recode 88 | } 89 | 90 | -------------------------------------------------------------------------------- /luaxml-entities.lua: -------------------------------------------------------------------------------- 1 | local M = {} 2 | local char = unicode and unicode.utf8.char or utf8.char 3 | local named_entities 4 | if kpse then 5 | named_entities = require "luaxml-namedentities" 6 | else 7 | named_entities = require "luaxml.namedentities" 8 | end 9 | local hexchartable = {} 10 | local decchartable = {} 11 | 12 | 13 | local function get_named_entity(name) 14 | return named_entities[name] 15 | end 16 | 17 | local function test_invalid_unicode(charnumber) 18 | return charnumber > 127 and charnumber < 256 19 | end 20 | 21 | local function get_entity(charcode, chartable, base) 22 | local character = chartable[charcode] 23 | if not character then 24 | local charnumber = tonumber(charcode,base) 25 | -- if test_invalid_unicode(charnumber) then 26 | -- return nil 27 | -- end 28 | character = char(charnumber) 29 | chartable[charcode] = character 30 | end 31 | return character 32 | end 33 | 34 | 35 | function M.decode(s) 36 | return s:gsub("&([#a-zA-Z0-9%_%:%-]-);", function(m) 37 | -- check if this is named entity first 38 | local named = get_named_entity(m) 39 | local original_entity = "&" .. m .. ";" 40 | if named then return named end 41 | -- check if it is numeric entity 42 | local hex, charcode = m:match("#([xX]?)([a-fA-F0-9]+)") 43 | -- if the entity is not numeric 44 | if not charcode then return 45 | original_entity 46 | end 47 | local character 48 | if hex~="" then 49 | character = get_entity(charcode, hexchartable, 16) or original_entity 50 | else 51 | character = get_entity(charcode, decchartable, 10) 52 | end 53 | return character 54 | end) 55 | end 56 | 57 | return M 58 | 59 | 60 | -------------------------------------------------------------------------------- /luaxml-htmltemplates.lua: -------------------------------------------------------------------------------- 1 | 2 | local luaxml_sty = require "luaxml-sty" 3 | -- try 4 | local xmltransform = luaxml_sty.transformations.html 5 | if not xmltransform then 6 | xmltransform = luaxml_sty.declare_transformer("html") 7 | end 8 | 9 | 10 | xmltransform:add_action("head", [[ 11 | \tableofcontents 12 | ]]) 13 | 14 | xmltransform:add_action("img", [[\noindent\includegraphics[max width=\textwidth]{@{src}}]]) 15 | 16 | xmltransform:add_action("h1", [[\addcontentsline{toc}{section}{%s}\section*{%s} 17 | ]]) 18 | xmltransform:add_action("h2", [[\addcontentsline{toc}{subsection}{%s}\subsection*{%s} 19 | ]]) 20 | -- don't add lower sectioning level than subsection 21 | xmltransform:add_action("h3", [[\addcontentsline{toc}{subsubsection}{%s}\subsubsection*{%s} 22 | ]]) 23 | xmltransform:add_action("h4", [[\addcontentsline{toc}{subsubsection}{%s}\subsubsection*{%s} 24 | ]]) 25 | xmltransform:add_action("h5", [[\addcontentsline{toc}{subsubsection}{%s}\subsubsection*{%s} 26 | ]]) 27 | xmltransform:add_action("h6", [[\addcontentsline{toc}{subsubsection}{%s}\subsubsection*{%s} 28 | ]]) 29 | 30 | xmltransform:add_action("i", [[\textit{%s}]]) 31 | xmltransform:add_action("em", [[\emph{%s}]]) 32 | xmltransform:add_action("b", [[\textbf{%s}]]) 33 | xmltransform:add_action("strong", [[\textbf{%s}]]) 34 | xmltransform:add_action("tt", [[\texttt{%s}]]) 35 | xmltransform:add_action("samp", [[\texttt{%s}]]) 36 | xmltransform:add_action("kbd", [[\texttt{%s}]]) 37 | xmltransform:add_action("var", [[\textit{%s}]]) 38 | xmltransform:add_action("dfn", [[\texttt{%s}]]) 39 | xmltransform:add_action("code", [[\texttt{%s}]]) 40 | xmltransform:add_action("a[href]", [[\textit{%s}\protect\footnote{\texttt{@{href}}}]]) 41 | 42 | 43 | local itemize = [[ 44 | \begin{itemize} 45 | %s 46 | \end{itemize} 47 | ]] 48 | xmltransform:add_action("ul", itemize) 49 | xmltransform:add_action("menu", itemize) 50 | xmltransform:add_action("ol", [[ 51 | \begin{enumerate} 52 | %s 53 | \end{enumerate} 54 | ]]) 55 | 56 | xmltransform:add_action("dl", [[ 57 | \begin{description} 58 | %s 59 | \end{description} 60 | ]]) 61 | 62 | 63 | xmltransform:add_action("li", "\\item %s\n") 64 | xmltransform:add_action("dt", "\\item[%s] ") 65 | 66 | local quote = [[ 67 | \begin{quotation} 68 | %s 69 | \end{quotation} 70 | ]] 71 | 72 | xmltransform:add_action("blockquote", quote) 73 | xmltransform:add_action("q", "\\enquote{%s}") 74 | xmltransform:add_action("abbr", "%s\\protect\\footnote{@{title}}") 75 | xmltransform:add_action("sup", "\\textsuperscript{%s}") 76 | xmltransform:add_action("sub", "\\textsubscript{%s}") 77 | 78 | xmltransform:add_action("table", [[ 79 | \begin{calstable} 80 | %s 81 | \end{calstable} 82 | ]]) 83 | 84 | xmltransform:add_action("tr", "\\brow %s \\erow") 85 | xmltransform:add_action("td", "\\cell{%s}") 86 | xmltransform:add_action("th", "\\cell{%s}") 87 | 88 | 89 | -- this is the original code for verbatim, but I changed LuaXML to not escape characters in verbatim, 90 | -- so we can use the verbatim environment 91 | xmltransform:add_action("pre", [[{\parindent=0pt\obeylines\ttfamily\catcode`\ =\active\def {\ }\catcode`\#=11%% 92 | %s} 93 | 94 | ]], {verbatim=true}) 95 | xmltransform:add_action("pre *", [[%s]]) 96 | 97 | -- 98 | xmltransform:add_action("pre", [[ 99 | \begin{verbatim}%s\end{verbatim} 100 | ]], {verbatim=true}) 101 | 102 | xmltransform:add_action("details", [[%s 103 | ]]) 104 | 105 | xmltransform:add_action("details summary", [[ 106 | \medskip 107 | \noindent %s 108 | 109 | \smallskip 110 | \noindent 111 | ]]) 112 | 113 | xmltransform:add_action("figure", [[ 114 | \begin{figure}[hbt!] 115 | \centering 116 | 117 | %s 118 | 119 | \end{figure} 120 | ]]) 121 | 122 | xmltransform:add_action("figcaption", [[\caption{%s}]]) 123 | 124 | 125 | xmltransform:add_action("p", [[ 126 | 127 | %s 128 | 129 | ]]) 130 | 131 | xmltransform:add_action("br", [[\\]]) 132 | 133 | -- some fixes for weird web pages 134 | xmltransform:add_action("a p", [[%s]]) 135 | xmltransform:add_action("h1 a[href], h2 a[href], h3 a[href], h4 a[href], h5 a[href], h6 a[href]", "%s") 136 | 137 | 138 | -- mathjax is special element added by rmodepdf around LaTeX math 139 | xmltransform:add_action("mathjax",[[%s]], {verbatim=true,collapse_newlines=false}) 140 | 141 | xmltransform:add_action("hyperlink", "\\hyperlink{@{href}}{%s}") 142 | xmltransform:add_action("hypertarget", "\\hypertarget{@{id}}{%s}") 143 | 144 | return xmltransform 145 | -------------------------------------------------------------------------------- /luaxml-parse-query.lua: -------------------------------------------------------------------------------- 1 | -- Source: https://github.com/leafo/web_sanitize 2 | -- Author: Leaf Corcoran 3 | local R, S, V, P 4 | do 5 | local _obj_0 = require("lpeg") 6 | R, S, V, P = _obj_0.R, _obj_0.S, _obj_0.V, _obj_0.P 7 | end 8 | local C, Cs, Ct, Cmt, Cg, Cb, Cc, Cp 9 | do 10 | local _obj_0 = require("lpeg") 11 | C, Cs, Ct, Cmt, Cg, Cb, Cc, Cp = _obj_0.C, _obj_0.Cs, _obj_0.Ct, _obj_0.Cmt, _obj_0.Cg, _obj_0.Cb, _obj_0.Cc, _obj_0.Cp 12 | end 13 | local alphanum = R("az", "AZ", "09") 14 | local num = R("09") 15 | local quotes = S("'\"") ^ 1 16 | local white = S(" \t\n") ^ 0 17 | -- this is a deviation from the upstream, we allow "|" in the tag name, because 18 | -- luaxml doesn't support XML namespaces and elements must be queried using 19 | -- dom:query_selector("namespace|element") 20 | local word = (alphanum + S("_-") + S("|")) ^ 1 21 | local attr_word = (alphanum + S("_-") + S("|:")) ^ 1 22 | 23 | local combinators = S(">~+") 24 | 25 | local attr_name = (alphanum + S("_-:")) ^ 1 26 | local attr_function = S("~|^$*") ^ 0 27 | 28 | local attr_content = C((P(1) - quotes) ^ 1) 29 | local mark 30 | mark = function(name) 31 | return function(...) 32 | return { 33 | name, 34 | ... 35 | } 36 | end 37 | end 38 | local parse_query 39 | parse_query = function(query) 40 | local tag = word / mark("tag") 41 | local cls = P(".") * (word / mark("class")) 42 | local id = P("#") * (word / mark("id")) 43 | local any = P("*") / mark("any") 44 | local nth = P(":nth-child(") * C(num ^ 1) * ")" / mark("nth-child") 45 | local first = P(":first-child") / mark("first-child") 46 | local first_of_type = P(":first-of-type") / mark("first-of-type") 47 | local last = P(":last-child") / mark("last-child") 48 | local last_of_type = P(":last-of-type") / mark("last-of-type") 49 | local attr = P("[") * C(attr_word) * P("]") / mark("attr") 50 | local attr_value = P("[") * C(attr_name ) * C(attr_function)* P("=") * quotes * attr_content * quotes * P("]") / mark("attr_value") 51 | local combinator = C(combinators) / mark("combinator") 52 | local selector = Ct((any + nth + first + first_of_type + last + last_of_type + tag + cls + id + attr + attr_value + combinator) ^ 1) 53 | local pq = Ct(selector * (white * selector) ^ 0) 54 | local pqs = Ct(pq * (white * P(",") * white * pq) ^ 0) 55 | pqs = pqs * (white * -1) 56 | return pqs:match(query) 57 | end 58 | return { 59 | parse_query = parse_query 60 | } 61 | -------------------------------------------------------------------------------- /luaxml-pretty.lua: -------------------------------------------------------------------------------- 1 | --module(...,package.seeall) 2 | 3 | --- Lua pretty printer from http://mini.net/cgi-bin/lua/44.html
4 | -- This was extracted from utility code in "util.lua".
5 | -- 23/02/2001 jcw@equi4.com
6 | -- Pretty displays a value, properly dealing with tables and cycles 7 | 8 | local displayvalue= 9 | function (s) 10 | if not s or type(s)=='function' or type(s)=='userdata' then 11 | s=tostring(s) 12 | elseif type(s)~='number' then 13 | s=string.gsub(string.format('%q',s),'^"([^"\']*)"$',"'%1'") 14 | end 15 | return s 16 | end 17 | 18 | local askeystr= 19 | function (u,s) 20 | if type(u)=='string' and string.find(u,'^[%w_]+$') then return s..u end 21 | return '['..displayvalue(u)..']' 22 | end 23 | 24 | local horizvec= 25 | function (x,n) 26 | local o,e='','' 27 | for i=1,#x do 28 | if type(x[i])=='table' then return end 29 | o=o..e..displayvalue(x[i]) 30 | if string.len(o)>n then return end 31 | e=',' 32 | end 33 | return '('..o..')' 34 | end 35 | 36 | local horizmap= 37 | function (x,n) 38 | local o,e='','' 39 | for k,v in pairs(x) do 40 | if type(v)=='table' then return end 41 | o=o..e..askeystr(k,'')..'='..displayvalue(v) 42 | if string.len(o)>n then return end 43 | e=',' 44 | end 45 | return '{'..o..'}' 46 | end 47 | local M = {} 48 | local function pretty(p,x,h,q) 49 | if not p then p,x='globals',globals() end 50 | if type(x)=='table' then 51 | if not h then h={} end 52 | if h[x] then 53 | x=h[x] 54 | else 55 | if not q then q=p end 56 | h[x]=q 57 | local s={} 58 | for k,v in pairs(x) do table.insert(s,k) end 59 | if #s>0 then 60 | local n=75-string.len(p) 61 | local f=#s==#x and horizvec(x,n) 62 | if not f then f=horizmap(x,n) end 63 | if not f then 64 | table.sort(s,function (a,b) 65 | --if tag(a)~=tag(b) then a,b=tag(b),tag(a) end 66 | if type(a)~=type(b) then a,b=type(b),type(a) end 67 | return a= 5.3", 8 | "lpeg >= 1.0.2", 9 | "dkjson" 10 | } 11 | description = { 12 | summary = "LuaXML is pure lua library for reading and serializing of the XML files.", 13 | detailed = [[ 14 | LuaXML is pure lua library for reading and serializing of the XML files. Current release is aimed mainly as support 15 | for the odsfile package. The documentation was created by automatic conversion of original documentation in the source code. 16 | In this version, some files not useful for luaTeX were dropped. ]], 17 | homepage = "https://github.com/michal-h21/LuaXML", 18 | license = "MIT" 19 | } 20 | build = { 21 | type = "builtin", 22 | modules = { 23 | ["luaxml.cssquery"] = "luaxml-cssquery.lua", 24 | ["luaxml.domobject"] = "luaxml-domobject.lua", 25 | ["luaxml.entities"] = "luaxml-entities.lua", 26 | ["luaxml.encodings"] = "luaxml-encodings.lua", 27 | ["luaxml.mod-handler"] = "luaxml-mod-handler.lua", 28 | ["luaxml.mod-html"] = "luaxml-mod-html.lua", 29 | ["luaxml.mod-xml"] = "luaxml-mod-xml.lua", 30 | ["luaxml.namedentities"] = "luaxml-namedentities.lua", 31 | ["luaxml.parse-query"] = "luaxml-parse-query.lua", 32 | ["luaxml.pretty"] = "luaxml-pretty.lua", 33 | ["luaxml.stack"] = "luaxml-stack.lua", 34 | ["luaxml.testxml"] = "luaxml-testxml.lua", 35 | ["luaxml.transform"] = "luaxml-transform.lua" 36 | }, 37 | build_command = [[ 38 | lua data/jsontolua.lua < data/entities.json > luaxml-namedentities.lua 39 | ]] 40 | } 41 | -------------------------------------------------------------------------------- /test/cssquery-test.lua: -------------------------------------------------------------------------------- 1 | require "busted.runner" () 2 | kpse.set_program_name "luatex" 3 | 4 | local dom = require "luaxml-domobject" 5 | 6 | local cssquery = require "luaxml-cssquery" 7 | 8 | local obj = cssquery() 9 | -- obj:debug() 10 | 11 | describe("CSS selector handling", function() 12 | local selector = "div#pokus span.ahoj, p, div.ahoj:first-child" 13 | local objects = obj:prepare_selector(selector) 14 | it("should parse selectors", function() 15 | assert.same(#objects, 3) 16 | end) 17 | it("should calculate specificity", function() 18 | assert.same(obj:calculate_specificity(objects[1]), 112) 19 | end) 20 | local document = [[ 21 | 22 | 23 |
24 | first child 25 | Pokus 26 |

Uff

27 | Something different 28 |
29 | 30 | 31 | ]] 32 | local newobj = dom.parse(document) 33 | local matchedlist = obj:get_selector_path(newobj, objects) 34 | it("should get selector path",function() 35 | assert.same(#matchedlist, 3) 36 | end) 37 | describe("List selectors that matches object", function() 38 | -- this should match two elements with "ahoj" class 39 | obj:add_selector(".ahoj", function(domobj) 40 | domobj:set_attribute("style", "color:green") 41 | return false 42 | end) 43 | -- but the one with "pokus" id should block to use the class match 44 | obj:add_selector("#pokus", function(domobj) 45 | domobj:set_attribute("style", "color:red") 46 | return false 47 | end) 48 | -- Rule for #pokus should be first in the selectors list 49 | it("Automatic specificity sorting should work", function() 50 | assert.same(obj.querylist[1].specificity, 100) 51 | end) 52 | local span_ahoj = newobj:query_selector "span.ahoj" [1] 53 | local div_ahoj = newobj:query_selector "div.ahoj" [1] 54 | it("query_selector should work", function() 55 | assert.same(span_ahoj:get_element_name(), "span") 56 | assert.same(div_ahoj:get_element_name(), "div") 57 | end) 58 | it("Saved query matches should work", function() 59 | assert.same(#obj:match_querylist(span_ahoj), 1) 60 | -- should match .ahoj and #pokus 61 | assert.same(#obj:match_querylist(div_ahoj), 2) 62 | end) 63 | it("Applying querylist should work", function() 64 | local div_querylist = obj:match_querylist(div_ahoj) 65 | obj:apply_querylist(div_ahoj, div_querylist) 66 | assert.same(div_ahoj:get_attribute("style"), "color:red") 67 | end) 68 | it("Any selector should work", function() 69 | local div_any = newobj:query_selector("div *") 70 | local body_any = newobj:query_selector("body *") 71 | local body_direct_any = newobj:query_selector("body > *") 72 | assert.same(#div_any, 4) 73 | assert.same(#body_any, 5) 74 | assert.same(#body_direct_any, 1) 75 | end) 76 | -- for k,v in ipairs(obj.querylist) do 77 | -- print(k, v.source, v.specificity) 78 | -- end 79 | end) 80 | -- assert.truthy(#obj:prepare_selector(selector)==2) 81 | end) 82 | 83 | describe("pseudo-classes", function() 84 | local sample = [[ 85 | 86 | 87 | foo 88 | bar 89 | baz 90 | last 91 | 92 | 93 | ]] 94 | local nth = [[ 95 | data 96 | items 97 | item:nth-child(2) 98 | ]] 99 | 100 | local first = "item:first-child" 101 | local dom = dom.parse(sample) 102 | local css = cssquery() 103 | 104 | css:add_selector(nth, function(obj) 105 | assert.equal(obj:get_text(), "bar") 106 | end) 107 | 108 | css:add_selector(first, function(obj) 109 | assert.equal(obj:get_text(), "foo") 110 | end) 111 | 112 | css:add_selector("items :last-child", function(obj) 113 | assert.equal(obj:get_text(), "last") 114 | end) 115 | 116 | -- this shouldn't match 117 | local last_item_matched = false 118 | -- item is not last child, ite element't doesn't exist 119 | css:add_selector("item:last-child, ite :last-child", function(obj) 120 | last_item_matched = true 121 | end) 122 | 123 | 124 | it("Should match pseudo classes", function() 125 | dom:traverse_elements(function(el) 126 | local querylist = css:match_querylist(el) 127 | css:apply_querylist(el, querylist) 128 | end) 129 | assert.equal(last_item_matched, false) 130 | end) 131 | 132 | it("Should match first-of-type", function() 133 | local first_type = dom:query_selector("another:first-of-type") 134 | assert.equal(#first_type, 1) 135 | assert.equal(first_type[1]:get_text(), "last") 136 | end) 137 | 138 | it("Should match last-of-type", function() 139 | local last = dom:query_selector("item:last-of-type") 140 | assert.equal(#last, 1) 141 | assert.equal(last[1]:get_text(), "baz") 142 | end) 143 | 144 | end) 145 | 146 | 147 | describe("attribute selectors", function() 148 | local sample = [[ 149 |

150 | link to hello 151 | hello 152 | czech text 153 | test word 154 | test start 155 |

156 | ]] 157 | local dom = dom.parse(sample) 158 | local css = cssquery() 159 | 160 | local function asserttext(obj, text) 161 | assert.equal(obj:get_text(), text) 162 | end 163 | css:add_selector("a[href]", function(obj) asserttext(obj, "link to hello") end) 164 | css:add_selector("[id='hello']", function(obj) asserttext(obj, "hello") end) 165 | css:add_selector("[lang|='cs']", function(obj) asserttext(obj, "czech text") end) 166 | css:add_selector("[class~='world']", function(obj) asserttext(obj, "test word") end) 167 | css:add_selector("[id^='very']", function(obj) asserttext(obj, "test start") end) 168 | css:add_selector("[id$='word']", function(obj) asserttext(obj, "test start") end) 169 | css:add_selector("[id*='long']", function(obj) asserttext(obj, "test start") end) 170 | it("Should match attributes", function() 171 | dom:traverse_elements(function(el) 172 | local querylist = css:match_querylist(el) 173 | css:apply_querylist(el, querylist) 174 | end) 175 | end) 176 | 177 | end) 178 | 179 | describe("combinators", function() 180 | local sample = [[ 181 |

182 | hello 183 | link to hello 184 | czech text 185 | test word 186 | child content ignore this 187 | test start 188 |

189 | ]] 190 | 191 | local dom = dom.parse(sample) 192 | local css = cssquery() 193 | 194 | -- test how many span elements match the sibling combinator 195 | local number_of_spans = 0 196 | css:add_selector("a ~ span", function(obj) 197 | number_of_spans = number_of_spans + 1 198 | end) 199 | 200 | local span_after_b = 0 201 | css:add_selector("b + span", function(obj) 202 | span_after_b = span_after_b + 1 203 | end) 204 | 205 | local deep_child = "" 206 | 207 | css:add_selector("p i", function(obj) 208 | deep_child = obj:get_text() 209 | end) 210 | 211 | it("Should match combinators", function() 212 | dom:traverse_elements(function(el) 213 | local querylist = css:match_querylist(el) 214 | css:apply_querylist(el, querylist) 215 | end) 216 | assert.same(number_of_spans, 3) 217 | assert.same(span_after_b, 1) 218 | assert.same(deep_child, "ignore this") 219 | end) 220 | 221 | it("Should support selector removing", function() 222 | assert.same(#css.querylist, 3) 223 | css:remove_selector("a ~ span") 224 | assert.same(#css.querylist, 2) 225 | end) 226 | 227 | end) 228 | -------------------------------------------------------------------------------- /test/dom-test.lua: -------------------------------------------------------------------------------- 1 | require "busted.runner" () 2 | kpse.set_program_name "luatex" 3 | 4 | local dom = require "luaxml-domobject" 5 | 6 | describe("Basic DOM functions", function() 7 | local document = [[ 8 | 9 | pokus 10 | 11 |

pokus

12 |

nazdar

13 |
14 |
15 | 16 | 17 | ]] 18 | 19 | local obj = dom.parse(document) 20 | it("It should parse XML", function() 21 | assert.same(type(obj), "table") 22 | assert.truthy(obj:root_node()) 23 | end) 24 | 25 | it("Path retrieving should work", function() 26 | local path = obj:get_path("html body") 27 | assert.truthy(path) 28 | assert.same(#path, 1) 29 | assert.truthy(path[1]:is_element()) 30 | assert.same(#path[1]:get_children(), 9) 31 | end) 32 | 33 | describe("Node traversing should work", function() 34 | it("Should get all nodes", function() 35 | local t = {} 36 | obj:traverse(function(node) 37 | t[#t+1] = node 38 | end) 39 | assert.same(#t, 21) 40 | end) 41 | it("Should get stripped strings", function() 42 | assert.same(#obj:stripped_strings(), 3) 43 | end) 44 | it("Should get all strings", function() 45 | assert.same(#obj:strings(),12) 46 | end) 47 | end) 48 | 49 | 50 | describe("Basic DOM traversing should work", function() 51 | local matched = false 52 | local count = 0 53 | obj:traverse_elements(function(el) 54 | count = count + 1 55 | if obj:get_element_name(el) == "p" then 56 | matched = true 57 | it("Element matching should work", function() 58 | assert.same(el:root_node():get_node_type(), "ROOT") 59 | assert.truthy(el:is_element()) 60 | assert.same(el:get_element_name(), "p") 61 | end) 62 | it("Node serializing should work", function() 63 | local p_serialize = el:serialize() 64 | assert.same(p_serialize, "

nazdar

") 65 | end) 66 | it("Adding text elements should work", function() 67 | local newtext = el:create_text_node(" světe") 68 | el:add_child_node(newtext) 69 | assert.same(el:serialize(), "

nazdar světe

") 70 | end) 71 | el:remove_node(el) 72 | end 73 | end) 74 | it("Traverse should find 7 elements and match one

", function() 75 | assert.truthy(matched) 76 | assert.same(count, 9) 77 | end) 78 | end) 79 | 80 | describe("Modified DOM object serializing", function() 81 | local serialized = obj:serialize() 82 | assert.truthy(serialized) 83 | assert.same(type(serialized), "string") 84 | assert.truthy(serialized:match("")) 85 | assert.is_nil(serialized:match("

")) 86 | end) 87 | 88 | 89 | describe("Query selector matching should work", function() 90 | local document = [[ 91 | 92 | pokus 93 | 94 |

pokus

95 |

nazdar

96 |

First noindent

97 |

Second noindent

98 | 99 | 100 | ]] 101 | local newobj = dom.parse(document) 102 | local matched = newobj:query_selector(".noindent") 103 | it("Should return table", function() 104 | assert.same(type(matched), "table") 105 | end) 106 | it("Should match two elements", function() 107 | assert.same(#matched, 2) 108 | end) 109 | local el = matched[2] 110 | it("Should be possible to add new elements to the matched elements",function() 111 | local text = newobj:create_text_node(" with added text") 112 | el:add_child_node(text) 113 | assert.same(el:serialize(),'

Second noindent with added text

') 114 | end) 115 | 116 | end) 117 | describe("Text retrieving should work", function() 118 | local document = [[ 119 | 120 | 121 |

pokus

122 |

First noindent 123 | some another text. More text. 124 |

125 | 126 | 127 | ]] 128 | local newobj = dom.parse(document) 129 | local matched = newobj:query_selector(".noindent") 130 | it("Should return table", function() 131 | assert.same(type(matched), "table") 132 | end) 133 | it("Should have one element", function() 134 | assert.same(#matched, 1) 135 | end) 136 | local par = matched[1] 137 | local text = par:get_text() 138 | it("Should return element's text content", function() 139 | assert.truthy(text:match( "First noindent\nsome another text. More text.")) 140 | end) 141 | end) 142 | describe("Handling of void elements", function() 143 | local test_metas = function(metas, msg) 144 | it(msg, function() 145 | assert.same(type(metas), "table") 146 | assert.same(#metas, 2) 147 | end) 148 | end 149 | 150 | local document = [[ 151 | 152 | 153 | 154 | 155 | 156 | 157 | ]] 158 | local newobj = dom.parse(document) 159 | local metas = newobj:query_selector("meta") 160 | test_metas(metas,"Should match two meta elements") 161 | -- test configuration of void elements 162 | local second = [[ 163 | 164 | Hello 165 | world 166 | 167 | ]] 168 | local newobj = dom.parse(second, {}) 169 | local metas = newobj:query_selector("meta") 170 | test_metas(metas,"Should support configuration of the void elements") 171 | end) 172 | describe("Inner HTML", function() 173 | local document = [[ 174 |

hello

175 | 176 | ]] 177 | local newdom = dom.html_parse(document) 178 | local p = newdom:query_selector("p")[1] 179 | -- insert inner_html as XML 180 | p:inner_html("hello this should be the new content", true) 181 | it("Should support inner_html", function() 182 | local children = p:get_children() 183 | assert.same(#children, 3) 184 | assert.truthy(children[1]:is_text()) 185 | assert.same(children[1]._text,"hello ") 186 | assert.truthy(children[2]:is_element()) 187 | assert.same(children[2]._name,"b") 188 | -- now insert inner_html as HTML 189 | p:inner_html("hello this should be the new content") 190 | children = p:get_children() 191 | assert.same(#children, 3) 192 | end) 193 | local text = [[ 194 |

hello, here are some tags

195 | 196 | ]] 197 | local newdom = dom.html_parse(text) 198 | local b = newdom:query_selector("b")[1] 199 | it("Should support insert_before_begin", function() 200 | b:insert_before_begin("here are more tags") 201 | local siblings = b:get_siblings() 202 | local pos = b:find_element_pos() 203 | assert.same(siblings[pos - 2]._text, "here ") 204 | assert.same(siblings[pos - 1]._name, "x") 205 | end) 206 | it("Should support insert_after_end", function() 207 | b:insert_after_end(", here are even more tags") 208 | local siblings = b:get_siblings() 209 | local pos = b:find_element_pos() 210 | assert.same(siblings[pos + 1]._text, ", here are even ") 211 | assert.same(siblings[pos + 2]._name, "y") 212 | end) 213 | it("Should support insert_after_begin", function() 214 | b:insert_after_begin("try even more, ") 215 | local children = b:get_children() 216 | assert.same(children[1]._text, "try ") 217 | assert.same(children[2]._name, "i") 218 | end) 219 | it("Should support insert_before_end", function() 220 | b:insert_before_end(", some tags at the end") 221 | local children = b:get_children() 222 | assert.same(children[#children-1]._name, "i") 223 | assert.same(children[#children]._text, " at the end") 224 | end) 225 | 226 | end) 227 | 228 | end) 229 | -------------------------------------------------------------------------------- /test/entities-test.lua: -------------------------------------------------------------------------------- 1 | require "busted.runner" () 2 | kpse.set_program_name "luatex" 3 | local entities = require "luaxml-entities" 4 | 5 | describe("Entities decoding should work",function() 6 | local decode = entities.decode 7 | it("should parse named entities", function() 8 | assert.same(decode("&"), "&") 9 | assert.same(decode("<"), "<") 10 | assert.same(decode("""), '"') 11 | assert.same(decode(" "), "\n") 12 | end) 13 | it("should parse decimal entities", function() 14 | assert.same(decode("@"), "@") 15 | end) 16 | it("should parse hexa entities", function() 17 | assert.same(decode("č"), "č") 18 | end) 19 | 20 | end) 21 | -------------------------------------------------------------------------------- /test/transform-test.lua: -------------------------------------------------------------------------------- 1 | require "busted.runner" () 2 | kpse.set_program_name "luatex" 3 | 4 | local domobject = require "luaxml-domobject" 5 | local transform = require "luaxml-transform" 6 | 7 | describe("Basic DOM functions", function() 8 | local transformer1 = transform.new() 9 | local transformer2 = transform.new() 10 | local text="hello" 11 | transformer1:add_action("b", "transform1: %s") 12 | transformer2:add_action("b", "transform2: %s") 13 | it("should do basic transformations", function() 14 | assert.same("transform1: hello", transformer1:parse_xml(text)) 15 | end) 16 | it("should support multiple transformer objects", function() 17 | assert.same("transform2: hello", transformer2:parse_xml(text)) 18 | end) 19 | end) 20 | 21 | describe("Transform DOM object", function() 22 | local transformer = transform.new() 23 | local dom = domobject.parse [[
hello world
]] 24 | transformer:add_action("section", "sect: %s") 25 | transformer:add_action("b", "b: %s") 26 | it("should transform dom object", function() 27 | assert.same("sect: hello b: world", transformer:process_dom(dom)) 28 | end) 29 | end) 30 | 31 | describe("Selectors support", function() 32 | local transformer = transform.new() 33 | local dom1 = domobject.parse [[hello world]] 34 | local dom2 = domobject.parse [[hello world]] 35 | local dom3 = domobject.parse [[hello world]] 36 | local dom4 = domobject.parse [[hello world]] 37 | 38 | 39 | transformer:add_action("x b", "xb: %s") 40 | transformer:add_action("v b", "vb: %s") 41 | transformer:add_action(".hello", "hello: %s") 42 | -- try the alternative syntax for content 43 | transformer:add_action("#id", "id: @<.>") 44 | it("should support css selectors", function() 45 | assert.same("hello xb: world", transformer:process_dom(dom1)) 46 | assert.same("hello vb: world", transformer:process_dom(dom2)) 47 | assert.same("hello hello: world", transformer:process_dom(dom3)) 48 | assert.same("hello id: world", transformer:process_dom(dom4)) 49 | end) 50 | 51 | end) 52 | 53 | describe("Function test", function() 54 | local transformer = transform.new() 55 | local dom1 = domobject.parse [[hello world]] 56 | transformer:add_custom_action("b", function(el) 57 | return "fn: " ..el:get_text() 58 | end) 59 | it("should support function transformers", function() 60 | assert.same("hello fn: world", transformer:process_dom(dom1)) 61 | end) 62 | local dom2 = domobject.parse [[worldhello, ]] 63 | local transformer = transform.new() 64 | local get_child_element = transform.get_child_element 65 | local process_children = transform.process_children 66 | transformer:add_custom_action("x", function(el) 67 | local first = process_children(get_child_element(el, 1)) 68 | local second = process_children(get_child_element(el, 2)) 69 | return second .. first 70 | end) 71 | it("should correctly transform children",function() 72 | assert.same("hello, world", transformer:process_dom(dom2)) 73 | end) 74 | -- try the new syntax 75 | local transformer2 = transform.new() 76 | transformer2:add_action("x", "@<2>@<1>") 77 | it("should correctly transform children using the @ syntax", function() 78 | assert.same("hello, world", transformer2:process_dom(dom2)) 79 | end) 80 | 81 | end) 82 | 83 | describe("Attribute conversion", function() 84 | local transformer = transform.new() 85 | local dom1 = domobject.parse [[hello world]] 86 | transformer:add_action("b", "%s") 87 | transformer:add_action("b[style]", "s=@{style} %s") 88 | it("should transform attributes", function() 89 | assert.same("hello s=red world", transformer:process_dom(dom1)) 90 | end) 91 | end) 92 | 93 | describe("Escapes", function() 94 | local transformer1 = transform.new() 95 | local transformer2 = transform.new() 96 | local dom1 = domobject.parse [[{}&]] 97 | -- reset unicodes table in the second object 98 | transformer2.unicodes = {} 99 | it("should correctly escape special characters", function() 100 | assert.same('\\{\\}\\&', transformer1:process_dom(dom1)) 101 | -- the second object shouldn't escape special characters 102 | assert.same('{}&', transformer2:process_dom(dom1)) 103 | end) 104 | 105 | end) 106 | 107 | describe("children selection templates", function() 108 | local transformer = transform.new() 109 | local dom = domobject.parse "hello insignificant world" 110 | transformer:add_action("x", "@", {separator=", "}) 111 | it("selects just elements", function() 112 | assert.same("hello, world", transformer:process_dom(dom)) 113 | end) 114 | end) 115 | 116 | 117 | describe("support pseudo classes", function() 118 | local transformer = transform.new() 119 | local dom = domobject.parse "hello, world" 120 | transformer:add_action("a:last-child", "last") 121 | it("select last child", function() 122 | assert.same("hello, last", transformer:process_dom(dom)) 123 | end) 124 | end) 125 | 126 | describe("children transformation", function() 127 | local transformer = transform.new() 128 | local dom = domobject.parse "a+bab" 129 | transformer:add_action("mfrac","\\frac{@<1>}{@<2>}") 130 | -- transformer:add_action("mfrac","\\frac{@<.>}") 131 | it("correctly process child elements", function() 132 | assert.same("\\frac{a+b}{\\frac{a}{b}}",transformer:process_dom(dom)) 133 | end) 134 | 135 | end) 136 | 137 | describe("remove action", function() 138 | local transformer = transform.new() 139 | local dom = domobject.parse "first, second" 140 | transformer:add_action("a", "hello") 141 | transformer:add_action("b", "world") 142 | it("should change words", function() 143 | assert.same("hello, world", transformer:process_dom(dom)) 144 | end) 145 | it("should change only the first word", function() 146 | -- remove action for the element 147 | transformer:reset_actions("b") 148 | assert.same("hello, second", transformer:process_dom(dom)) 149 | end) 150 | end) 151 | --------------------------------------------------------------------------------