├── Cask ├── README.md ├── test └── ids-test.el ├── ids.el ├── ids-normalize.el └── ids-equiv.el /Cask: -------------------------------------------------------------------------------- 1 | (source melpa) 2 | 3 | (package "ids" "0.0.1" "IDS utilities.") 4 | (depends-on "dash") 5 | (depends-on "cask-package-toolset") 6 | 7 | (development 8 | (depends-on "ert-runner")) 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # IDS check program. 2 | 3 | ## Usage: 4 | 5 | ``` 6 | M-x load-file ids-db 7 | ``` 8 | 9 | Then, 10 | 11 | ``` 12 | (ids-normalize "_IDS_Sequence_") 13 | ``` 14 | 15 | will produce normalized IDS sequences. 16 | -------------------------------------------------------------------------------- /test/ids-test.el: -------------------------------------------------------------------------------- 1 | ;;; ids-test.el --- test program for IDS tools. -*- lexical-binding: t; -*- 2 | 3 | ;; Copyright (C) 2014 KAWABATA, Taichi 4 | 5 | ;; Author: KAWABATA, Taichi 6 | 7 | ;;; Commentary: 8 | 9 | ;; % cask exec ert-runner -L . 10 | 11 | ;;; Code: 12 | 13 | (require 'ert) 14 | (require 'bytecomp) 15 | (require 'ids-db) 16 | (require 'dash) 17 | 18 | (ids-db-init) 19 | 20 | ;; ids.el 21 | (ert-deftest ids-split-string () 22 | (should (equal (ids-split-string "⿰氵⿱艹⿻口夫⿱宀子文") 23 | '("⿰氵⿱艹⿻口夫" "⿱宀子" "文")))) 24 | 25 | (ert-deftest ids-tree-structure () 26 | (should (equal (ids-tree-structure "⿰氵⿱𠂉母") 27 | '(?⿰ ?氵 (?⿱ ?𠂉 ?母))))) 28 | 29 | ;; ids-normalize.el 30 | (ert-deftest ids-normalize-structure () 31 | (should (equal (apply 'string (ids-normalize-structure "⿶AB")) 32 | "⿱BA"))) 33 | 34 | (ert-deftest ids-normalize-rotate () 35 | (should (equal (car (ids-normalize "⿰⿰⿰⿰ABCD⿱⿱⿱⿱EFGHI")) 36 | "⿰A⿰B⿰C⿰D⿱E⿱F⿱G⿱HI"))) 37 | 38 | (ert-deftest ids-normalize-shrink () 39 | (should (-reduce 40 | '-intersection 41 | `(,(ids-normalize "⿱⿳士冖王心") 42 | ,(ids-normalize "⿱⿳士冖一志"))))) 43 | 44 | (ert-deftest ids-normalize-all () 45 | (should (-reduce 46 | '-intersection 47 | `(,(ids-normalize "⿰⿱双双攵") 48 | ,(ids-normalize "⿰㕛𢼅") 49 | ,(ids-normalize "⿲㕛㕛攵") 50 | ,(ids-normalize "⿰叕攵") 51 | ("敪"))))) 52 | 53 | (provide 'ids-test) 54 | 55 | ;;; ids-test.el ends here 56 | -------------------------------------------------------------------------------- /ids.el: -------------------------------------------------------------------------------- 1 | ;;; ids.el --- IDS (Ideographic Description Sequence) processing utilities -*- lexical-binding: t; -*- 2 | 3 | ;; Copyright (C) 2014 KAWABATA, Taichi 4 | 5 | ;; Author: KAWABATA, Taichi 6 | ;; Keywords: data 7 | 8 | ;; This program is free software; you can redistribute it and/or modify 9 | ;; it under the terms of the GNU General Public License as published by 10 | ;; the Free Software Foundation, either version 3 of the License, or 11 | ;; (at your option) any later version. 12 | 13 | ;; This program is distributed in the hope that it will be useful, 14 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ;; GNU General Public License for more details. 17 | 18 | ;; You should have received a copy of the GNU General Public License 19 | ;; along with this program. If not, see . 20 | 21 | ;;; Commentary: 22 | 23 | ;; Basic functions for IDS (Ideographic Description Sequence) processing 24 | 25 | ;;; Code: 26 | 27 | (require 'dash) 28 | 29 | (declare-function ids-debug "ids-debug") 30 | 31 | (defun ids-forward-char () 32 | "If there is an IDS at the point in buffer, forward the point to next. 33 | Return the IDS tree structure." 34 | (interactive) 35 | (cond ((looking-at "[⿰⿱⿴⿵⿶⿷⿸⿹⿺⿻]") 36 | (forward-char) 37 | (list (char-before (point)) 38 | (ids-forward-char) (ids-forward-char))) 39 | ((looking-at "[⿲⿳]") 40 | (forward-char) 41 | (list (char-before (point)) 42 | (ids-forward-char) (ids-forward-char) (ids-forward-char))) 43 | ((eobp) (error "Incomplete IDS! %s" (buffer-string))) 44 | (t (forward-char) (char-before (point))))) 45 | 46 | (defun ids-split-string (string) 47 | "Split STRING by IDS." 48 | (with-temp-buffer 49 | (insert string) 50 | (goto-char (point-min)) 51 | (let (result (point (point))) 52 | (while (not (eobp)) 53 | (ids-forward-char) 54 | (push (buffer-substring point (point)) result) 55 | (setq point (point))) 56 | (nreverse result)))) 57 | 58 | (defun ids-tree-structure (ids) 59 | "Tree structure of IDS string." 60 | (with-temp-buffer 61 | (insert ids) 62 | (goto-char (point-min)) 63 | (ids-forward-char))) 64 | 65 | (defsubst ids-idc (ids) 66 | (if (listp ids) (car ids))) 67 | 68 | (defsubst ids-left (ids) 69 | (if (listp ids) (cadr ids))) 70 | 71 | (defsubst ids-center (ids) 72 | (if (and (listp ids) (= 4 (length ids))) (caddr ids))) 73 | 74 | (defsubst ids-right (ids) 75 | (if (listp ids) (car (last ids)))) 76 | 77 | (provide 'ids) 78 | ;;; ids.el ends here 79 | -------------------------------------------------------------------------------- /ids-normalize.el: -------------------------------------------------------------------------------- 1 | ;;; ids-normalize.el --- IDS Normalization Tools -*- lexical-binding: t; -*- 2 | 3 | ;; Copyright (C) 2014 KAWABATA, Taichi 4 | 5 | ;; Author: KAWABATA, Taichi 6 | ;; Keywords: tools 7 | 8 | ;; This program is free software; you can redistribute it and/or modify 9 | ;; it under the terms of the GNU General Public License as published by 10 | ;; the Free Software Foundation, either version 3 of the License, or 11 | ;; (at your option) any later version. 12 | 13 | ;; This program is distributed in the hope that it will be useful, 14 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ;; GNU General Public License for more details. 17 | 18 | ;; You should have received a copy of the GNU General Public License 19 | ;; along with this program. If not, see . 20 | 21 | ;;; Commentary: 22 | 23 | ;; A tool to normalize IDS string. 24 | 25 | ;;; Code: 26 | 27 | (require 'ids) 28 | 29 | ;; following variables are initialized by ids-db.el 30 | (defvar ids-normalize-table) 31 | (defvar ids-reverse-table) 32 | (defvar ids-canonical-table) 33 | 34 | (defun ids-normalize-structure (ids) 35 | "Normalize IDS string. 36 | Return IDS tree." 37 | (setq ids (replace-regexp-in-string "[⿵⿸⿹]" "⿱" ids)) 38 | (setq ids (replace-regexp-in-string "[⿷⿺]" "⿰" ids)) 39 | (setq ids (ids-tree-structure ids)) 40 | (cl-labels 41 | ((proc (it) 42 | (cond ((not (listp it)) it) 43 | ((equal (car it) ?⿶) 44 | (list ?⿱ (proc (elt it 2)) (proc (elt it 1)))) 45 | ((equal (car it) ?⿲) 46 | `(?⿰ ,(proc (elt it 1)) 47 | (?⿰ ,(proc (elt it 2)) ,(proc (elt it 3))))) 48 | ((equal (car it) ?⿳) 49 | `(?⿱ ,(proc (elt it 1)) 50 | (?⿱ ,(proc (elt it 2)) ,(proc (elt it 3))))) 51 | (t (mapcar #'proc it))))) 52 | (proc ids))) 53 | 54 | (defun ids-normalize-canonicalize (ids-tree) 55 | "Replace characters in IDS-TREE to canonical ones." 56 | (--tree-map 57 | (let ((decomposition (car (get-char-code-property it 'decomposition)))) 58 | (if (and (not (equal decomposition it)) (characterp decomposition)) 59 | decomposition 60 | (-if-let (canon (gethash it ids-canonical-table)) 61 | canon 62 | it))) 63 | ids-tree)) 64 | 65 | (defun ids-normalize-rotate (ids-tree) 66 | "Rotate IDS-TREE. Return ids-trees." 67 | (if (characterp ids-tree) (error "Not Tree! %s" ids-tree)) 68 | (let ((head (car ids-tree)) 69 | (left (elt ids-tree 1)) 70 | (right (elt ids-tree 2)) 71 | left-trees) 72 | ;; shrink left tree if possible. 73 | (setq left-trees 74 | (if (and (listp left) 75 | (not (equal head (car left)))) 76 | (ids-normalize-shrink left) 77 | (list left))) 78 | ;; expand left tree if possible 79 | (setq left-trees 80 | (-uniq 81 | (--mapcat 82 | (if (characterp it) (ids-normalize-expand it head) (list it)) left-trees))) 83 | ;; rotate 84 | (-uniq 85 | (--mapcat 86 | (if (and (listp it) (equal (car it) head)) 87 | (ids-normalize-rotate `(,head ,(elt it 1) (,head ,(elt it 2) ,right))) 88 | (mapcar (lambda (right) (list head it right)) 89 | (ids-normalize-shrink right))) 90 | left-trees)))) 91 | 92 | (defun ids-normalize-shrink (ids-tree) 93 | "Shrink an IDS-TREE. Return a list of possible IDS trees. 94 | e.g. '⿰氵毎' → (海), 95 | '⿺走斗' → ('﨣' '𧺯')" 96 | (if (characterp ids-tree) 97 | (--filter (characterp it) 98 | (-uniq (cons ids-tree (gethash ids-tree ids-normalize-table)))) 99 | (let ((ids-trees (ids-normalize-rotate ids-tree))) 100 | (or 101 | (-uniq 102 | (--mapcat (gethash it ids-reverse-table) 103 | ids-trees)) 104 | ids-trees)))) 105 | 106 | (defun ids-normalize-expand (char ids-head) 107 | "Return IDS trees of CHAR which can be expanded with IDS-HEAD. 108 | If it can't be expanded, return list of CHAR." 109 | (or 110 | (--filter 111 | (and (listp it) (equal (car it) ids-head)) 112 | (-uniq 113 | (--mapcat 114 | (if (characterp it) (gethash it ids-normalize-table) (list it)) 115 | (gethash char ids-normalize-table)))) 116 | (list char))) 117 | 118 | (defun ids-normalize (ids) 119 | "Normalize IDS string. 120 | Returns a list of IDS string." 121 | (interactive) 122 | (let* ((ids-tree 123 | (ids-normalize-canonicalize 124 | (ids-normalize-structure ids))) 125 | (ids-trees (ids-normalize-shrink ids-tree))) 126 | (ids-normalize-to-strings ids-trees))) 127 | 128 | (defun ids-normalize-to-strings (ids-trees) 129 | "Convert IDS-TREES to a list of String." 130 | (--map (apply 'string (-flatten it)) ids-trees)) 131 | 132 | (provide 'ids-normalize) 133 | 134 | ;;; ids-normalize.el ends here 135 | -------------------------------------------------------------------------------- /ids-equiv.el: -------------------------------------------------------------------------------- 1 | ;;; ids-equiv.el --- IDS character equivalence tool -*- lexical-binding: t; -*- 2 | 3 | ;; Copyright (C) 2014 KAWABATA, Taichi 4 | 5 | ;; Author: KAWABATA, Taichi 6 | 7 | ;; This program is free software; you can redistribute it and/or modify 8 | ;; it under the terms of the GNU General Public License as published by 9 | ;; the Free Software Foundation, either version 3 of the License, or 10 | ;; (at your option) any later version. 11 | 12 | ;; This program is distributed in the hope that it will be useful, 13 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | ;; GNU General Public License for more details. 16 | 17 | ;; You should have received a copy of the GNU General Public License 18 | ;; along with this program. If not, see . 19 | 20 | ;;; Commentary: 21 | 22 | ;; IDS canonicalization and equivalence database. 23 | 24 | ;;; Code: 25 | 26 | (defvar ids-canonicals) 27 | (setq 28 | ids-canonicals 29 | ;; Characters can be canonicalized when: 30 | ;; (1) characters can be equally decomposed (e.g. "土 vs. 士") 31 | ;; (2) Counterpart pairs can not be decomposed furthermore (e.g. "八 vs. 丷") 32 | ;; (3) Character may contain equivalent character inside. (e.g. "大 vs. 犬") 33 | '( 34 | ("壬" "𡈼") ; [1] 35 | ("土" "士") ; [1a] [寺]CT, [吉𠮷],[壮-壮] 36 | ("工" "") ; [9] 昂-昻 37 | ("八" "丷") ; [15a] 兌-兑, S1.5.h 38 | ("十" "𠂇") ; [24] 39 | ("几" "𠘧" "𠘨") ; 40 | ("禾" "𥝌") ; 41 | ("刊" "刋") ; [24] 42 | ("凢" "凣") ; [24-1] 43 | ("丁" "𠄐") ; [31a] 44 | ("木" "朩" "𣎳") ; [30a] [栗]CT 45 | ("月" "⺼" "" "" "" "") ; [36] [望]CT, [炙]CT, [然]CT,肉月 46 | ("戌" "戍") ; [34a] [㤜-㤜] 47 | ("盇" "𥁋") ; [35] 48 | ("凡" "卂" "𠁽") ; [38] [巩]CJ, [嬴]CT, [𪎒-𪎒], [汎-汎] 49 | ("牛" "𠂒") ; [42] 吿-告 50 | ("肀" "") ; [43] 唐 51 | ("畀" "𢌿") ; [45] [鼻]CJ 52 | ("夨" "") ; 53 | ("里" "") ; 54 | ("承" "") ; 55 | ;; ("異" "") ; 56 | ("冄" "") ; [53] 57 | ("另" "叧" "𠮠") ; [60] 枴-柺,別-别 58 | ("亏" "亐") ; [62-3] 汚-污, [𡧈-𡧈], [𥁄-𥁄], [𥃳-𥃳], [𧥦-𧥦] 59 | ("山" "屮") ; [62-5] [嶲-嶲], [㒞-㒞] 60 | ("臿" "𢆍") ; [62-7] 挿-插-揷 61 | ("羊" "𦍌") ; [62-10] [善]CT 62 | ("眞" "真") ; [63b] 鷆-鷏,顚-顛,鎭-鎮,槇-槙,愼-慎,巓-巔,塡-填 S1.5 63 | ("県" "") ; [63c] S1.5 64 | ("抛" "拋") ; [65] 65 | ("日" "冃" "曰") ; [68] [冒]TJ 66 | ("己" "巳" "㔾" "已") ; [71a] 67,70,71を含む 67 | ("本" "夲") ; [71-1] 68 | ;; 亼 𠓛 [79] 69 | ("夾" "㚒") ; [79a] [䀹]CJ, [㣣]CJ error unify 70 | ("⺈" "𠂉") ; [31a] 尓-尔,亇[CJ] 71 | ("⺈" "刀" "𠂊") ; [81] 絕-絶, [兔]CK, [免-免] 72 | ("内" "內") ; [80] 吶-呐 73 | ("丏" "丐") ; [81-1] 麪-麫 74 | ("厂" "丆") ; [81-2] 䂖 75 | ("一" "丶" "乀") ; 匆-匆 対策 76 | ("夬" "") ; 77 | ("茲" "兹" "玆") ; [83] 孳-孶 [嗞]CJ, [嵫]CK, [慈-慈]CT, [螆]CJ, [滋-滋] 78 | ("卉" "𠦃" "𠦄") ; [84] 79 | ("𦣝" "𦣞") ; [90] 姫-姬*, 煕-熙 80 | ("姫" "姬") ; [91] 81 | ;; 叟 ⿱申又 [92] 82 | ("申" "𦥔") ; [92a] [叟-叟] 83 | ("电" "") ; [92b] 奄-𡘹,[淹-淹], [掩-掩] 84 | ("㬰" "臾") ; [92c] [𣢧-𣢧], [瘐-瘐] 85 | ;; 寛 寬 [93] 86 | ("萈" "莧") ; [93a] 寛-寬 87 | ("冊" "册" "𠕁") ; [94] 删-刪,姍-姗,柵-栅,[珊]CJ (S1.5) 88 | ("廾" "廾") ; [95] 89 | ("开" "幵" "𠦅") ; [96] 妍-姸, 研-硏, 訮-詽, 豜-豣, 邢-郉, 鈃-銒, [汧]CJ 90 | ("并" "幷") ; [97] 併-倂,屏-屛,帡-帲,瓶-甁,胼-腁,軿-輧,迸-逬,餅-餠,駢-騈 (S1.5) 91 | ("毎" "每") ; [98] [侮]CJ, [悔]CJ, [敏]CJ (S1.5) 92 | ("黑" "黒") ; [99] 93 | ("熏" "𤋱") ; [99] 薫-薰 94 | ("東" "柬") ; [100] 諫-諌, 錬-鍊, 鶇-鶫, 䦨-闌,[煉]CJ, [練]CJ 95 | ("曽" "曾") ; [101] 増-增 (S1.5) 96 | ("子" "孑") ; 97 | ("乙" "𠃉") ; 98 | ("㞋" "𠬝") ; [赧]CJ,[報-報] 99 | ("厄" "卮" "𢀴") ; 卮は䝈のみ 100 | ("勳" "勲") ; [爋]CT 101 | ("㠯" "") ; 102 | ("攵" "夂" "夊") ; [106] 夐-敻 [62-2] 103 | ("市" "巿") ; [109] [姉]CT,[昁],[沛],[肺],[閙],[鬧],[㧊],[㸬] 104 | ;; 穉 [110] 105 | ("郷" "鄉") ; [111] 106 | ("匚" "匸") ; [118] 107 | ("先" "兂" "旡") ; [123a,b] 108 | ("大" "犬") ; [124] [器]CJ, 獎-奬, 戻-戾, 达-迖, 涙-淚, [類]CJ, [莽]CK, 臭-𦤀 109 | ("大" "太") ; [124a] 馱-駄 110 | ("免" "兔") ; [127] 嬎-嬔, 晚-晩 111 | ;; 辶 [128] 112 | ("豕" "豖") ; [129] [啄]CJ, [琢]CJ 113 | ("尢" "𡯁") ; [66] 114 | ("尢" "尤") ; [129-4] 㞊[GT] 115 | ("王" "玉") ; [129-6] 囯-国 116 | ("刄" "刅") ; [129-7] 剏-剙 117 | ("刄" "刃") ; [170] S1.5.j 118 | ("叉" "㕚") ; [129-1] 119 | ("丈" "𠀋") ; [129-2] 120 | ("单" "単") ; [129-5] 弹-弾 (S1.5) 121 | ("曳" "曵") ; [129-8] [㡼-㡼]CT 122 | ("徴" "徵") ; [131] 123 | ("𡵉" "𡵂") ; [132a] 徳-德 124 | ("巛" "𡿧") ; [134-3] 菑-葘, 輜-輺, [災-災], [甾-甾, 㿳[GT] 125 | ("寜" "寧") ; [134-1] 126 | ("舃" "舄") ; [134-2] 127 | ("鳯" "鳳") ; [134-4] 128 | ("戋" "㦮" "𢦍") ; [134-5] [残]CJ, [浅]CJ 129 | ("焭" "煢") ; [134-6] 130 | ("㡳" "底") ; [134-7] [菧-菧] 131 | ("奥" "奧") ; [135] 132 | ("粤" "粵") ; [135a] 133 | ("𡭴" "𡭽" "𡮂") ; [137] [隙]CJ 134 | ("篡" "簒") ; [138] 135 | ("吕" "呂") ; [138-1] 宫-宮 (S1.5) 136 | ("𤰞" "卑") ; [138-2] [婢-婢] ;; 無限ループ防止 137 | ("虽" "𧈧") ; [139] 強-强 138 | ;; 口 厶 [139] 139 | ("肙" "䏍") ; [139a] [悁-悁], [捐-捐], [睊-睊], [蜎-蜎], [鋗-鋗], [蜎-蜎] 圎-圓 140 | ("兖" "兗") 141 | ("衮" "袞") 142 | ("圖" "圗") ; [139d] 143 | ("員" "貟") ; [139e] 圎-圓, [霣-霣], [𤠔-𤠔] 144 | ("黃" "黄") ; [141] 横-橫 (S1.5) 145 | ("菫" "堇") ; [142] 146 | ("堇" "𦰌") ; [142] 147 | ("𦰩" "") ; [143] 148 | ("隺" "寉") ; [143-2] [鶴-鶴] 149 | ("争" "爭") ; [144] 静-靜,筝-箏,浄-淨,峥-崢,净-凈 (S1.5) 150 | ("為" "爲") ; [144a] 蒍-蔿,溈-潙,媯-嬀,偽-僞 (S1.5) 151 | ("口" "") ; [145] 高-髙 152 | ("靑" "青") ; [146] 淸-清 (S1.5) 153 | ("昷" "𥁕") ; [147] 煴-熅, 媪-媼, 愠-慍, 揾-搵, 榅-榲, 氲-氳, 温-溫, 緼-縕, 腽-膃, 蒀-蒕, 蕰-薀, 藴-蘊, 輼-轀, 醖-醞, 鰛-鰮 (S1.5) 154 | ("同" "") ; [148] 爂釁爨 155 | ("魚" "𩵋") ; [149] IRG24 156 | ("頼" "賴") ; [151] 瀨-瀬 157 | ("疐" "𤴡") ; [152-1] 嚏-嚔 158 | ("眾" "衆") ; [152-2] 159 | ("么" "幺") ; [152-3] 麼-麽 160 | ("壽" "夀") ; [152-4] 161 | ("友" "犮") ; [152-5] 抜-拔, 髪-髮 162 | ("皐" "臯") ; [152-6] 翶-翺, 皡-皥 163 | ("兔" "兎") ; [152-7] 164 | ("稟" "禀") ; [152-9] 䕲 165 | ("釆" "采") ; [152-11] [釉]CT, [彩-彩] 166 | ("爽" "𡙁") ; [152-12] 167 | ("廉" "亷") ; [152-13] [𣀊-𣀊] 168 | ("正" "𤴔") ; [152-10] 頙 169 | ("皀" "" "艮" "𠧢") ; [153] S1.5 廄-廏,廄-廏,既-旣,匓[CJK] 170 | ;;  [154] 171 | ("食" "飠" "𩙿") ; [155] 飮-飲 (S1.5) 172 | ("象" "𧰼") ; [156] 173 | ;; 巤 [157] 174 | ("煕" "𤋮") ; [158] 175 | ("囱" "囪") ; [159] 176 | ;; 鼠 [160] 177 | ("示" "礻") ; [161] S1.5 178 | ("示" "𤓯") ; [祖-祖] 179 | ;; 状 狀 [162] 180 | ("丬" "爿") ; [162a] 妆-妝, 壮-壯, 寝-寢, 将-將, 荘-莊, 蒋-蔣, 状-狀, 装-裝, 醤-醬 181 | ;;  [163] (𠂢) 182 | ("" "𧘇") 183 | ("車" "𨊥") ; [165] 撃-擊 184 | ("业" "") ; [166] 嘘-噓, 戯-戱, 虚-虛, [黹-黹], [普-晉] 185 | ("彐" "彑") ; [167] 彔录, 168 186 | ("殻" "𣪊" "㱿") ; [174] 本来はUCS対象外 187 | ("冒" "冐") ; [176a] 188 | ("畫" "畵") ; [177] 189 | ("俞" "兪") ; [178] 偷-偸,喩-喻,婾-媮,楡-榆 190 | ("専" "專") 191 | ("恵" "惠") 192 | ("晉" "晋") ; [178-2] 戩-戬 (S1.5) 193 | ("缶" "𦈢" "𠙻") ; [178-4] [啣-啣], [啕-啕], [徭-徭], [揺-摇], [滛-滛] 194 | ("羮" "羹") ; [178-5] 195 | ;; 丗 (8C4B) [178-6] 196 | ("帯" "带") ; [178-6] 197 | ("走" "赱") ; [178-7] 198 | ("𣴎" "羕") ; [178-8] 199 | ("羡" "羨") ; [178-9] 200 | ("亡" "亾" "兦") ; [178-10] [慌-慌], 㡃/㡆,[惘-惘],巟-㠩,罔-㒺 201 | ("网" "𦉳") ; [178-11] 202 | ("睿" "𥈠") ; [179a] 㲊䜜 203 | ;; [180] 204 | ("手" "龵") ; 掰 205 | ("乑" "") ; [聚] 206 | ("歹" "歺") ; 殩 (𥹏vs粲) 207 | ("嬴" "𡣍") ; [瀛-瀛] 208 | ("厂" "𠂆") ; [𠨬-𠨬] 209 | 210 | ("小" "忄" "𡭔") ; 211 | ("冂" "冖" "") ; 212 | ("冈" "罓") ; 213 | ("卄" "艹") ; 214 | ("卄" "廾") ; 215 | 216 | ("口" "囗") ; 217 | ("王" "𤣩") ; 218 | ("竹" "𥫗") ; 219 | ("牛" "牜") ; 220 | ("糸" "糹") ; 221 | ("言" "訁") ; 222 | ("足" "𧾷") ; 223 | ("䜌" "龻") ; 224 | ("𠦝" "龺") ; 225 | ("金" "釒") ; 226 | ("卜" "⺊") ; 227 | ("㐫" "㓙" "囟") ; 228 | ("卥" "𠧧") ; 229 | 230 | ("贛" "𥫔") ; 231 | ("亇" "个") ; 232 | ("寇" "𡨥") ; 233 | 234 | ("帀" "币") ; 235 | ("夅" "𡕘") ; 236 | ("瓜" "𤓰") ; 237 | ("𣎼" "𡥀") ; 238 | ("𠬛" "") ; 239 | ("婁" "𡝤") ; 240 | ("与" "") ; 241 | ("𦔮" "耴") ; 242 | ("會" "㑹") ; 243 | ("工" "") ; 244 | ("㸚" "𠈌") ; 245 | ("𢏚" "") ; 246 | ("𠃬" "") ; 247 | ("希" "𢁫") ; 248 | ("龠" "𠎤") ; 249 | ("由" "𠙹") ; 250 | ("幺" "乡") 251 | ("円" "丹") 252 | ("厂" "") 253 | ("又" "") 254 | ("甚" "𫞪") 255 | ("今" "𫝆") 256 | ("" "") 257 | ("冉" "") 258 | ("𠂎" "") 259 | ("" "") 260 | ("" "") 261 | ("𠀉" "" "" "") 262 | ("亼" "" "亽" "𠓛") 263 | ("丩" "𠂈") ; 264 | ("厂" "") ; 265 | ("卩" "龴" "") ; [令]CJ, similarity by China 266 | ("止" "龰") ; 267 | ("㐅" "乂") ; 268 | ("" "𦣻") ; [戛-戛] 269 | ("永" "𣱵" "𣱳") ; [178-8] [羕-羕],D13056 270 | ("才" "") ; 271 | ;; ("口" "𠔼") ; 𢘖 272 | ("彐" "⺕" "") ; 273 | ("甾" "𠚋") ; 274 | ("𦬇" "𦬠") ; 275 | ("𦍋" "芈" "羋") ; [哶-哶], [䖹] 276 | ("二" "𠄠" "𠄟" "𠄞") 277 | ("" "⺂") 278 | ("日" "臼" "𦥑") ; [71-3] 捏-揑, 陧-隉, [叟]CJ, [臾]simsun 279 | 280 | ("丨" "丿") ; 亅U+4E85 281 | ("龶" "主") ; [112-1] 282 | ("土" "𠀆") ; [44a] 283 | ("" "") 284 | ("𠃍" "乛") ;  285 | ("𠃍" "𠃌") 286 | ("兀" "丌") ; [123-1] [嬈-嬈], 287 | ("乚" "𠃊") ;乙 288 | ("廿" "龷") ; [143-1] 襔,㒼-䓣 289 | ("冫" "⺀") ; [25] [冬]CT 290 | ("九" "丸" "") ; [129-3] 骩-骫 no other examples 291 | ("且" "旦") ; [64] 查-査, [蔖-蔖] 292 | ("毋" "母") 293 | ("人" "入" "𠆢") ; [79] [全]TJ 294 | ("𫶬" "") 295 | ("" "甫") ; 296 | ("𡿨" "") 297 | ("耴" "𦔮") 298 | )) 299 | 300 | ;; Equivalences 301 | 302 | (defvar ids-equivalents) 303 | (setq 304 | ids-equivalents 305 | ;; Characters are equivalent when: 306 | ;; These characters can be decomposed differently (e.g. 王 vs. 壬) 307 | ;; e.g. 王→⿱一土, 壬→⿱丿士 308 | '( 309 | ("王" "壬") ; [1] 310 | ("干" "千") ; [1b] 311 | ("丰" "丯" "龶") ; [4] [契]CJ, [憲]CJ, [害]CJ 312 | ("戸" "戶" "户") ; [5] S3 313 | ("天" "夭") ; [6] 吞-呑, [忝] 314 | ("⺶" "⿱𢆉丆") 315 | 316 | ("孝" "𡥉" "𡥈") ; [8a] 317 | ("参" "叁") ; [10-1] 318 | ("參" "叄") ; [10-1] 319 | 320 | ("氺" "" "⿲𠄠丨𠄠") ; [17] 321 | ("水" "氺") ; [17a] [眔]CJ, [犀-犀] 322 | 323 | ("羽" "⿰彐彐") ; [19a] 324 | ("卯" "⿰𠂎刀") ; [23] 325 | ("殳" "⿱⺈又") ; [25-1] 沒-没, 歿-殁 326 | 327 | ("西" "覀") ; [27a] 328 | ("覀" "襾") ; [88] [覃],[覂],[䌁],要/S1.5.d 329 | 330 | ("四" "罒") ; [28a] XMLファイルには未掲載 331 | ("𠱠" "罒") ; 332 | 333 | ("月" "⿵⺆⺀") ; [36] 舟月 334 | ("凡" "⿵几一") ; [37] 335 | 336 | ("甫" "⿺𤰔丶") ; [40] 337 | ("甬" "⿱龴田") ; [41] 338 | ("吳" "呉" "吴") ; [48] [茣]CT, [虞]CT, [誤]CT (S1.5) 339 | ("吳" "⿺夨口") ; [48] [茣]CT, [虞]CT, [誤]CT (S1.5) 340 | 341 | ("毋" "毌") ; [98a] 毎-每, [侮]CJ, [瑇-瑇], [𦔣-𦔣], [貫-貫] 342 | ("田" "毌") ; [58] 343 | 344 | ("爰" "⿳爫土夂") ; [61a] 㬊[J] 345 | ("圼" "⿱臼土" "⿱臼工") ; [62] 捏-揑, 陧-隉, 毀-毁 346 | 347 | ("圣" "𢀖") ; [62-9] [茎] 348 | ("聿" "") ; [62-11] [衋]CJ 349 | 350 | ("九" "尢") ; [65a] 351 | ("兀" "尢") ; [62-8] 尪-尫, 抛-拋, 尶-尷 352 | 353 | ("儿" "几") ; [69] 354 | ("八" "儿") ; [26a] 355 | 356 | ("义" "叉") ; [71-2] [䁊]CT, [㳗]CK, [蚤]CT, [芆]CT 357 | 358 | ("产" "⿱文厂") ; [72] 産-產, 彥-彦, 顏-顔 359 | 360 | ("冈" "𦉪" "𠔿") ; [73] 嫓 361 | ("𠔿" "") ; 奐 𦉪 for BMP, 𠔿 for extB, [像-像] 362 | 363 | ("匕" "𠤎" "七") ; [78] [叱-叱]CJ 364 | 365 | ("犀" "⿸尸⿱氺㐄") ; 遲-遲 366 | 367 | ("卄" "卝") ; [82] 368 | 369 | ("𠫓" "⿱亠厶") ; [105] [育]CT, [㐬] 370 | ("巩" "⿰工几") ; [129-9] [築-築] 371 | ("𢛳" "⿳十罒心") ; [130] 徳-德 372 | ("㚅" "⿱夂生") ; [133a] 373 | ("" "⿱士冖") ; [134a] 壳 374 | 375 | ("小" "𣥂") ; [10a] 376 | ("少" "𣥂") ; [136] 步-歩,涉-渉,歲-歳 (S1.5) 377 | 378 | ("堇" "⿱廿⿻口土") ; [142b] 379 | ("曷" "⿱日匂") ; [150] 渇-渴, 掲-揭 (S1.5) 380 | ("韋" "⿳口帀") ; [152-8] 衛-衞 381 | ("庶" "庻" "⿸广⿱龷灬" "⿸广⿱龷从") ; [164] 382 | 383 | ("小" "⺌") ; [16] 尙-尚 384 | ("巛" "⺌" "⺍") ; [171] 䎩 385 | 386 | ("馬" "⿹廾") ; [173] 387 | ("疌" "⿳⺊⺕龰") ; [178-3] 婕-媫, [蜨-蜨] 388 | 389 | ("㕣" "⿱几口") ; [189] 兖-兗, 滚-滾, 衮-袞, [沿-沿]CT 390 | ("寽" "⿳爫丿寸") ; [192] 埒-埓 391 | ("蒙" "⿱冡") ; [懞-懞] 392 | ("匈" "⿹勹⿺𠃊㐅") ; 㕼[GJ], 𥑪-𥒚 詾-𧦷は矛盾 393 | ("切" "⿰土刀") ; [切-切] 394 | 395 | ("丰" "𠦂") ; [翺-翺] 396 | ("𠦂" "⿱⿲𠄠丨𠄠十") ; [110a] 噑-噑 397 | ("直" "⿱匕⿺𠃊目") ; [埴-埴] 398 | ("果" "⿻𦥑木") ; [巢-巢] 399 | 400 | ("荆" "荊") ; [181] 401 | ("臥" "卧") ; [187b] 402 | ("擧" "𦦙") ; [188] 403 | ("舍" "舎") ; [194] 404 | 405 | ("了" "𠄎") ; 406 | ("乙" "乚") ; 407 | ("囧" "⿴囗㕣") ; [㴄]CT 408 | ("大" "⿱一八") ; 𠨩 409 | ("夾" "⿱𠆢") ; 𣷚 410 | ("荒" "⿱艹㐬") ; 慌 411 | 412 | ("二" "冫") ; [33] 勻-匀, [次]CJ 413 | ("冫" "") ; 弱,𢏒𢏻𢏽𦸹 414 | 415 | ("匁" "⿹勹㐅") ; 416 | ("凢" "⿱一几") ; D35222.0 417 | 418 | ("吅" "⿴口丨") ; 419 | ("會" "⿳亼田日") ; 420 | ("" "𠂡" "⿵𠘨氺") ; 421 | ("𦲸" "⿱廿𠕒" "⿱廿雨") ; 422 | ("𣎵" "⿻屮八") ; 423 | ("爫" "爪") ; new entry. 424 | ("𠀍" "丗") 425 | )) 426 | 427 | (provide 'ids-equiv) 428 | ;;; ids-equiv.el ends here 429 | --------------------------------------------------------------------------------