├── .ipynb_checkpoints
    └── AutoChecker4Chinese-checkpoint.ipynb
├── AutoChecker4Chinese.ipynb
├── AutoChecker4Chinese.pdf
├── Autochecker4Chinese.py
├── cn_dict.txt
├── readme.md
├── readme.pdf
├── result.png
├── token_freq_pos%40350k_jieba.txt
├── token_pinyin%4040k_sogou.txt
└── words.dic


/.ipynb_checkpoints/AutoChecker4Chinese-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "deletable": true,
  7 |     "editable": true
  8 |    },
  9 |    "source": [
 10 |     "## Solutions of autochecker for chinese"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {
 16 |     "deletable": true,
 17 |     "editable": true
 18 |    },
 19 |    "source": [
 20 |     "### 1. Construct a detecter"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 1,
 26 |    "metadata": {
 27 |     "collapsed": true,
 28 |     "deletable": true,
 29 |     "editable": true
 30 |    },
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "# Step1 : construct a dict to detect the misspelled chinese phrase\n",
 34 |     "# key is the chinese word, value is its corresponding frequency appeared in corpus\n",
 35 |     "# you can finish this step by collecting corpus from the internet\n",
 36 |     "# or you can choose a more easy way, load some dicts already created by others"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 2,
 42 |    "metadata": {
 43 |     "collapsed": true,
 44 |     "deletable": true,
 45 |     "editable": true
 46 |    },
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "def construct_dict( file_path ):\n",
 50 |     "    \n",
 51 |     "    word_freq = {}\n",
 52 |     "    with open(file_path, \"r\") as f:\n",
 53 |     "        for line in f:\n",
 54 |     "            info = line.split()\n",
 55 |     "            word = info[0]\n",
 56 |     "            frequency = info[1]\n",
 57 |     "            word_freq[word] = frequency\n",
 58 |     "    \n",
 59 |     "    return word_freq"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 3,
 65 |    "metadata": {
 66 |     "collapsed": true,
 67 |     "deletable": true,
 68 |     "editable": true
 69 |    },
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "FILE_PATH = \"./token_freq_pos%40350k_jieba.txt\"\n",
 73 |     "\n",
 74 |     "phrase_freq = construct_dict( FILE_PATH )"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 4,
 80 |    "metadata": {
 81 |     "collapsed": false,
 82 |     "deletable": true,
 83 |     "editable": true
 84 |    },
 85 |    "outputs": [
 86 |     {
 87 |      "name": "stdout",
 88 |      "output_type": "stream",
 89 |      "text": [
 90 |       "<type 'dict'>\n",
 91 |       "349045\n"
 92 |      ]
 93 |     }
 94 |    ],
 95 |    "source": [
 96 |     "print( type(phrase_freq) )\n",
 97 |     "print( len(phrase_freq) )"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "markdown",
102 |    "metadata": {
103 |     "deletable": true,
104 |     "editable": true
105 |    },
106 |    "source": [
107 |     "### 2. Construct an autocorrecter"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 5,
113 |    "metadata": {
114 |     "collapsed": true,
115 |     "deletable": true,
116 |     "editable": true
117 |    },
118 |    "outputs": [],
119 |    "source": [
120 |     "import pinyin"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 11,
126 |    "metadata": {
127 |     "collapsed": false,
128 |     "deletable": true,
129 |     "editable": true
130 |    },
131 |    "outputs": [],
132 |    "source": [
133 |     "# list for chinese words\n",
134 |     "# read from the words.dic\n",
135 |     "def load_cn_words_dict( file_path ):\n",
136 |     "    cn_words_dict = \"\"\n",
137 |     "    with open(file_path, \"r\") as f:\n",
138 |     "        for word in f:\n",
139 |     "            cn_words_dict += word.strip().decode(\"utf-8\")\n",
140 |     "    return cn_words_dict"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": 12,
146 |    "metadata": {
147 |     "collapsed": true,
148 |     "deletable": true,
149 |     "editable": true
150 |    },
151 |    "outputs": [],
152 |    "source": [
153 |     "# function calculate the edite distance from the chinese phrase \n",
154 |     "def edits1(phrase, cn_words_dict):\n",
155 |     "    \"All edits that are one edit away from `phrase`.\"\n",
156 |     "    phrase = phrase.decode(\"utf-8\")\n",
157 |     "    splits     = [(phrase[:i], phrase[i:])  for i in range(len(phrase) + 1)]\n",
158 |     "    deletes    = [L + R[1:]                 for L, R in splits if R]\n",
159 |     "    transposes = [L + R[1] + R[0] + R[2:]   for L, R in splits if len(R)>1]\n",
160 |     "    replaces   = [L + c + R[1:]             for L, R in splits if R for c in cn_words_dict]\n",
161 |     "    inserts    = [L + c + R                 for L, R in splits for c in cn_words_dict]\n",
162 |     "    return set(deletes + transposes + replaces + inserts)"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": 13,
168 |    "metadata": {
169 |     "collapsed": true,
170 |     "deletable": true,
171 |     "editable": true
172 |    },
173 |    "outputs": [],
174 |    "source": [
175 |     "# return the phrease exist in phrase_freq\n",
176 |     "def known(phrases): return set(phrase for phrase in phrases if phrase.encode(\"utf-8\") in phrase_freq)"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": 14,
182 |    "metadata": {
183 |     "collapsed": true,
184 |     "deletable": true,
185 |     "editable": true
186 |    },
187 |    "outputs": [],
188 |    "source": [
189 |     "# get the candidates phrase of the error phrase\n",
190 |     "# we sort the candidates phrase's importance according to their pinyin\n",
191 |     "# if the candidate phrase's pinyin exactly matches with the error phrase, we put them into first order\n",
192 |     "# if the candidate phrase's first word pinyin matches with the error phrase first word, we put them into second order\n",
193 |     "# else we put candidate phrase into the third order\n",
194 |     "def get_candidates( error_phrase ):\n",
195 |     "    \n",
196 |     "    candidates_1st_order = []\n",
197 |     "    candidates_2nd_order = []\n",
198 |     "    candidates_3nd_order = []\n",
199 |     "    \n",
200 |     "    error_pinyin = pinyin.get(error_phrase, format=\"strip\", delimiter=\"/\").encode(\"utf-8\")\n",
201 |     "    cn_words_dict = load_cn_words_dict( \"./cn_dict.txt\" )\n",
202 |     "    candidate_phrases = list( known(edits1(error_phrase, cn_words_dict)) )\n",
203 |     "    \n",
204 |     "    for candidate_phrase in candidate_phrases:\n",
205 |     "        candidate_pinyin = pinyin.get(candidate_phrase, format=\"strip\", delimiter=\"/\").encode(\"utf-8\")\n",
206 |     "        if candidate_pinyin == error_pinyin:\n",
207 |     "            candidates_1st_order.append(candidate_phrase)\n",
208 |     "        elif candidate_pinyin.split(\"/\")[0] == error_pinyin.split(\"/\")[0]:\n",
209 |     "            candidates_2nd_order.append(candidate_phrase)\n",
210 |     "        else:\n",
211 |     "            candidates_3nd_order.append(candidate_phrase)\n",
212 |     "    \n",
213 |     "    return candidates_1st_order, candidates_2nd_order, candidates_3nd_order"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "code",
218 |    "execution_count": 15,
219 |    "metadata": {
220 |     "collapsed": false,
221 |     "deletable": true,
222 |     "editable": true
223 |    },
224 |    "outputs": [],
225 |    "source": [
226 |     "def auto_correct( error_phrase ):\n",
227 |     "    \n",
228 |     "    c1_order, c2_order, c3_order = get_candidates(error_phrase)\n",
229 |     "    # print c1_order, c2_order, c3_order\n",
230 |     "    if c1_order:\n",
231 |     "        return max(c1_order, key=phrase_freq.get )\n",
232 |     "    elif c2_order:\n",
233 |     "        return max(c2_order, key=phrase_freq.get )\n",
234 |     "    else:\n",
235 |     "        return max(c3_order, key=phrase_freq.get )"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": 16,
241 |    "metadata": {
242 |     "collapsed": false,
243 |     "deletable": true,
244 |     "editable": true
245 |    },
246 |    "outputs": [
247 |     {
248 |      "name": "stdout",
249 |      "output_type": "stream",
250 |      "text": [
251 |       "呕涂 呕吐\n",
252 |       "东方之朱 东方之珠\n",
253 |       "沙拢 沙龙\n"
254 |      ]
255 |     }
256 |    ],
257 |    "source": [
258 |     "# test for the auto_correct \n",
259 |     "error_phrase_1 = \"呕涂\" # should be \"呕吐\"\n",
260 |     "error_phrase_2 = \"东方之朱\" # should be \"东方之珠\"\n",
261 |     "error_phrase_3 = \"沙拢\" # should be \"沙龙\"\n",
262 |     "\n",
263 |     "print error_phrase_1, auto_correct( error_phrase_1 )\n",
264 |     "print error_phrase_2, auto_correct( error_phrase_2 )\n",
265 |     "print error_phrase_3, auto_correct( error_phrase_3 )"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "markdown",
270 |    "metadata": {
271 |     "deletable": true,
272 |     "editable": true
273 |    },
274 |    "source": [
275 |     "### 3. Correct the misspelled phrase in a sentance "
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "code",
280 |    "execution_count": 17,
281 |    "metadata": {
282 |     "collapsed": true,
283 |     "deletable": true,
284 |     "editable": true
285 |    },
286 |    "outputs": [],
287 |    "source": [
288 |     "# step 3 : Tokenization\n",
289 |     "# For any given sentence, use jieba do the segmentation\n",
290 |     "# Get segment list after segmentation is done\n",
291 |     "# check if the remain phrase exists in word_freq dict\n",
292 |     "# if not, then it is a misspelled phrase\n",
293 |     "# use auto_correct fun to correct the phrase"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "code",
298 |    "execution_count": 18,
299 |    "metadata": {
300 |     "collapsed": false,
301 |     "deletable": true,
302 |     "editable": true
303 |    },
304 |    "outputs": [],
305 |    "source": [
306 |     "import jieba\n",
307 |     "import string\n",
308 |     "import re"
309 |    ]
310 |   },
311 |   {
312 |    "cell_type": "code",
313 |    "execution_count": 19,
314 |    "metadata": {
315 |     "collapsed": true,
316 |     "deletable": true,
317 |     "editable": true
318 |    },
319 |    "outputs": [],
320 |    "source": [
321 |     "PUNCTUATION_LIST = string.punctuation\n",
322 |     "PUNCTUATION_LIST += \"。，？：；｛｝［］‘“”《》／！％……（）\""
323 |    ]
324 |   },
325 |   {
326 |    "cell_type": "code",
327 |    "execution_count": 21,
328 |    "metadata": {
329 |     "collapsed": true,
330 |     "deletable": true,
331 |     "editable": true
332 |    },
333 |    "outputs": [],
334 |    "source": [
335 |     "def auto_correct_sentence( error_sentence, verbose=True):\n",
336 |     "    \n",
337 |     "    jieba_cut = jieba.cut(err_test.decode(\"utf-8\"), cut_all=False)\n",
338 |     "    seg_list = \"\\t\".join(jieba_cut).split(\"\\t\")\n",
339 |     "    \n",
340 |     "    correct_sentence = \"\"\n",
341 |     "    \n",
342 |     "    for phrase in seg_list:\n",
343 |     "        \n",
344 |     "        correct_phrase = phrase\n",
345 |     "        # check if item is a punctuation\n",
346 |     "        if phrase not in PUNCTUATION_LIST.decode(\"utf-8\"):\n",
347 |     "            # check if the phrase in our dict, if not then it is a misspelled phrase\n",
348 |     "            if phrase.encode(\"utf-8\") not in phrase_freq.keys():\n",
349 |     "                correct_phrase = auto_correct(phrase.encode(\"utf-8\"))\n",
350 |     "                if verbose :\n",
351 |     "                    print phrase, correct_phrase\n",
352 |     "    \n",
353 |     "        correct_sentence += correct_phrase\n",
354 |     "    \n",
355 |     "    if verbose:\n",
356 |     "        print correct_sentence\n",
357 |     "    return correct_sentence"
358 |    ]
359 |   },
360 |   {
361 |    "cell_type": "code",
362 |    "execution_count": 23,
363 |    "metadata": {
364 |     "collapsed": false,
365 |     "deletable": true,
366 |     "editable": true
367 |    },
368 |    "outputs": [
369 |     {
370 |      "name": "stdout",
371 |      "output_type": "stream",
372 |      "text": [
373 |       "机七 机器\n",
374 |       "领遇 领域\n",
375 |       "分知 分枝\n",
376 |       "机器学习是人工智能领域最能体现智能的一个分枝！\n"
377 |      ]
378 |     }
379 |    ],
380 |    "source": [
381 |     "err_sent = '机七学习是人工智能领遇最能体现智能的一个分知！'\n",
382 |     "correct_sent = auto_correct_sentence( err_sent )"
383 |    ]
384 |   },
385 |   {
386 |    "cell_type": "code",
387 |    "execution_count": 24,
388 |    "metadata": {
389 |     "collapsed": false,
390 |     "deletable": true,
391 |     "editable": true
392 |    },
393 |    "outputs": [
394 |     {
395 |      "name": "stdout",
396 |      "output_type": "stream",
397 |      "text": [
398 |       "机器学习是人工智能领域最能体现智能的一个分枝！\n"
399 |      ]
400 |     }
401 |    ],
402 |    "source": [
403 |     "print correct_sent"
404 |    ]
405 |   },
406 |   {
407 |    "cell_type": "code",
408 |    "execution_count": null,
409 |    "metadata": {
410 |     "collapsed": true,
411 |     "deletable": true,
412 |     "editable": true
413 |    },
414 |    "outputs": [],
415 |    "source": []
416 |   }
417 |  ],
418 |  "metadata": {
419 |   "kernelspec": {
420 |    "display_name": "nlp_interview",
421 |    "language": "python",
422 |    "name": "nlp_interview"
423 |   },
424 |   "language_info": {
425 |    "codemirror_mode": {
426 |     "name": "ipython",
427 |     "version": 2
428 |    },
429 |    "file_extension": ".py",
430 |    "mimetype": "text/x-python",
431 |    "name": "python",
432 |    "nbconvert_exporter": "python",
433 |    "pygments_lexer": "ipython2",
434 |    "version": "2.7.13"
435 |   }
436 |  },
437 |  "nbformat": 4,
438 |  "nbformat_minor": 2
439 | }
440 | 


--------------------------------------------------------------------------------
/AutoChecker4Chinese.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "deletable": true,
  7 |     "editable": true
  8 |    },
  9 |    "source": [
 10 |     "## Solutions of autochecker for chinese"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {
 16 |     "deletable": true,
 17 |     "editable": true
 18 |    },
 19 |    "source": [
 20 |     "### 1. Construct a detecter"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 1,
 26 |    "metadata": {
 27 |     "collapsed": true,
 28 |     "deletable": true,
 29 |     "editable": true
 30 |    },
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "# Step1 : construct a dict to detect the misspelled chinese phrase\n",
 34 |     "# key is the chinese word, value is its corresponding frequency appeared in corpus\n",
 35 |     "# you can finish this step by collecting corpus from the internet\n",
 36 |     "# or you can choose a more easy way, load some dicts already created by others"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 2,
 42 |    "metadata": {
 43 |     "collapsed": true,
 44 |     "deletable": true,
 45 |     "editable": true
 46 |    },
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "def construct_dict( file_path ):\n",
 50 |     "    \n",
 51 |     "    word_freq = {}\n",
 52 |     "    with open(file_path, \"r\") as f:\n",
 53 |     "        for line in f:\n",
 54 |     "            info = line.split()\n",
 55 |     "            word = info[0]\n",
 56 |     "            frequency = info[1]\n",
 57 |     "            word_freq[word] = frequency\n",
 58 |     "    \n",
 59 |     "    return word_freq"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 3,
 65 |    "metadata": {
 66 |     "collapsed": true,
 67 |     "deletable": true,
 68 |     "editable": true
 69 |    },
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "FILE_PATH = \"./token_freq_pos%40350k_jieba.txt\"\n",
 73 |     "\n",
 74 |     "phrase_freq = construct_dict( FILE_PATH )"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 4,
 80 |    "metadata": {
 81 |     "collapsed": false,
 82 |     "deletable": true,
 83 |     "editable": true
 84 |    },
 85 |    "outputs": [
 86 |     {
 87 |      "name": "stdout",
 88 |      "output_type": "stream",
 89 |      "text": [
 90 |       "<type 'dict'>\n",
 91 |       "349045\n"
 92 |      ]
 93 |     }
 94 |    ],
 95 |    "source": [
 96 |     "print( type(phrase_freq) )\n",
 97 |     "print( len(phrase_freq) )"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "markdown",
102 |    "metadata": {
103 |     "deletable": true,
104 |     "editable": true
105 |    },
106 |    "source": [
107 |     "### 2. Construct an autocorrecter"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 5,
113 |    "metadata": {
114 |     "collapsed": true,
115 |     "deletable": true,
116 |     "editable": true
117 |    },
118 |    "outputs": [],
119 |    "source": [
120 |     "import pinyin"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 11,
126 |    "metadata": {
127 |     "collapsed": false,
128 |     "deletable": true,
129 |     "editable": true
130 |    },
131 |    "outputs": [],
132 |    "source": [
133 |     "# list for chinese words\n",
134 |     "# read from the words.dic\n",
135 |     "def load_cn_words_dict( file_path ):\n",
136 |     "    cn_words_dict = \"\"\n",
137 |     "    with open(file_path, \"r\") as f:\n",
138 |     "        for word in f:\n",
139 |     "            cn_words_dict += word.strip().decode(\"utf-8\")\n",
140 |     "    return cn_words_dict"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": 12,
146 |    "metadata": {
147 |     "collapsed": true,
148 |     "deletable": true,
149 |     "editable": true
150 |    },
151 |    "outputs": [],
152 |    "source": [
153 |     "# function calculate the edite distance from the chinese phrase \n",
154 |     "def edits1(phrase, cn_words_dict):\n",
155 |     "    \"All edits that are one edit away from `phrase`.\"\n",
156 |     "    phrase = phrase.decode(\"utf-8\")\n",
157 |     "    splits     = [(phrase[:i], phrase[i:])  for i in range(len(phrase) + 1)]\n",
158 |     "    deletes    = [L + R[1:]                 for L, R in splits if R]\n",
159 |     "    transposes = [L + R[1] + R[0] + R[2:]   for L, R in splits if len(R)>1]\n",
160 |     "    replaces   = [L + c + R[1:]             for L, R in splits if R for c in cn_words_dict]\n",
161 |     "    inserts    = [L + c + R                 for L, R in splits for c in cn_words_dict]\n",
162 |     "    return set(deletes + transposes + replaces + inserts)"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": 13,
168 |    "metadata": {
169 |     "collapsed": true,
170 |     "deletable": true,
171 |     "editable": true
172 |    },
173 |    "outputs": [],
174 |    "source": [
175 |     "# return the phrease exist in phrase_freq\n",
176 |     "def known(phrases): return set(phrase for phrase in phrases if phrase.encode(\"utf-8\") in phrase_freq)"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": 14,
182 |    "metadata": {
183 |     "collapsed": true,
184 |     "deletable": true,
185 |     "editable": true
186 |    },
187 |    "outputs": [],
188 |    "source": [
189 |     "# get the candidates phrase of the error phrase\n",
190 |     "# we sort the candidates phrase's importance according to their pinyin\n",
191 |     "# if the candidate phrase's pinyin exactly matches with the error phrase, we put them into first order\n",
192 |     "# if the candidate phrase's first word pinyin matches with the error phrase first word, we put them into second order\n",
193 |     "# else we put candidate phrase into the third order\n",
194 |     "def get_candidates( error_phrase ):\n",
195 |     "    \n",
196 |     "    candidates_1st_order = []\n",
197 |     "    candidates_2nd_order = []\n",
198 |     "    candidates_3nd_order = []\n",
199 |     "    \n",
200 |     "    error_pinyin = pinyin.get(error_phrase, format=\"strip\", delimiter=\"/\").encode(\"utf-8\")\n",
201 |     "    cn_words_dict = load_cn_words_dict( \"./cn_dict.txt\" )\n",
202 |     "    candidate_phrases = list( known(edits1(error_phrase, cn_words_dict)) )\n",
203 |     "    \n",
204 |     "    for candidate_phrase in candidate_phrases:\n",
205 |     "        candidate_pinyin = pinyin.get(candidate_phrase, format=\"strip\", delimiter=\"/\").encode(\"utf-8\")\n",
206 |     "        if candidate_pinyin == error_pinyin:\n",
207 |     "            candidates_1st_order.append(candidate_phrase)\n",
208 |     "        elif candidate_pinyin.split(\"/\")[0] == error_pinyin.split(\"/\")[0]:\n",
209 |     "            candidates_2nd_order.append(candidate_phrase)\n",
210 |     "        else:\n",
211 |     "            candidates_3nd_order.append(candidate_phrase)\n",
212 |     "    \n",
213 |     "    return candidates_1st_order, candidates_2nd_order, candidates_3nd_order"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "code",
218 |    "execution_count": 15,
219 |    "metadata": {
220 |     "collapsed": false,
221 |     "deletable": true,
222 |     "editable": true
223 |    },
224 |    "outputs": [],
225 |    "source": [
226 |     "def auto_correct( error_phrase ):\n",
227 |     "    \n",
228 |     "    c1_order, c2_order, c3_order = get_candidates(error_phrase)\n",
229 |     "    # print c1_order, c2_order, c3_order\n",
230 |     "    if c1_order:\n",
231 |     "        return max(c1_order, key=phrase_freq.get )\n",
232 |     "    elif c2_order:\n",
233 |     "        return max(c2_order, key=phrase_freq.get )\n",
234 |     "    else:\n",
235 |     "        return max(c3_order, key=phrase_freq.get )"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": 16,
241 |    "metadata": {
242 |     "collapsed": false,
243 |     "deletable": true,
244 |     "editable": true
245 |    },
246 |    "outputs": [
247 |     {
248 |      "name": "stdout",
249 |      "output_type": "stream",
250 |      "text": [
251 |       "呕涂 呕吐\n",
252 |       "东方之朱 东方之珠\n",
253 |       "沙拢 沙龙\n"
254 |      ]
255 |     }
256 |    ],
257 |    "source": [
258 |     "# test for the auto_correct \n",
259 |     "error_phrase_1 = \"呕涂\" # should be \"呕吐\"\n",
260 |     "error_phrase_2 = \"东方之朱\" # should be \"东方之珠\"\n",
261 |     "error_phrase_3 = \"沙拢\" # should be \"沙龙\"\n",
262 |     "\n",
263 |     "print error_phrase_1, auto_correct( error_phrase_1 )\n",
264 |     "print error_phrase_2, auto_correct( error_phrase_2 )\n",
265 |     "print error_phrase_3, auto_correct( error_phrase_3 )"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "markdown",
270 |    "metadata": {
271 |     "deletable": true,
272 |     "editable": true
273 |    },
274 |    "source": [
275 |     "### 3. Correct the misspelled phrase in a sentance "
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "code",
280 |    "execution_count": 17,
281 |    "metadata": {
282 |     "collapsed": true,
283 |     "deletable": true,
284 |     "editable": true
285 |    },
286 |    "outputs": [],
287 |    "source": [
288 |     "# step 3 : Tokenization\n",
289 |     "# For any given sentence, use jieba do the segmentation\n",
290 |     "# Get segment list after segmentation is done\n",
291 |     "# check if the remain phrase exists in word_freq dict\n",
292 |     "# if not, then it is a misspelled phrase\n",
293 |     "# use auto_correct fun to correct the phrase"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "code",
298 |    "execution_count": 18,
299 |    "metadata": {
300 |     "collapsed": false,
301 |     "deletable": true,
302 |     "editable": true
303 |    },
304 |    "outputs": [],
305 |    "source": [
306 |     "import jieba\n",
307 |     "import string\n",
308 |     "import re"
309 |    ]
310 |   },
311 |   {
312 |    "cell_type": "code",
313 |    "execution_count": 19,
314 |    "metadata": {
315 |     "collapsed": true,
316 |     "deletable": true,
317 |     "editable": true
318 |    },
319 |    "outputs": [],
320 |    "source": [
321 |     "PUNCTUATION_LIST = string.punctuation\n",
322 |     "PUNCTUATION_LIST += \"。，？：；｛｝［］‘“”《》／！％……（）\""
323 |    ]
324 |   },
325 |   {
326 |    "cell_type": "code",
327 |    "execution_count": 21,
328 |    "metadata": {
329 |     "collapsed": true,
330 |     "deletable": true,
331 |     "editable": true
332 |    },
333 |    "outputs": [],
334 |    "source": [
335 |     "def auto_correct_sentence( error_sentence, verbose=True):\n",
336 |     "    \n",
337 |     "    jieba_cut = jieba.cut(err_test.decode(\"utf-8\"), cut_all=False)\n",
338 |     "    seg_list = \"\\t\".join(jieba_cut).split(\"\\t\")\n",
339 |     "    \n",
340 |     "    correct_sentence = \"\"\n",
341 |     "    \n",
342 |     "    for phrase in seg_list:\n",
343 |     "        \n",
344 |     "        correct_phrase = phrase\n",
345 |     "        # check if item is a punctuation\n",
346 |     "        if phrase not in PUNCTUATION_LIST.decode(\"utf-8\"):\n",
347 |     "            # check if the phrase in our dict, if not then it is a misspelled phrase\n",
348 |     "            if phrase.encode(\"utf-8\") not in phrase_freq.keys():\n",
349 |     "                correct_phrase = auto_correct(phrase.encode(\"utf-8\"))\n",
350 |     "                if verbose :\n",
351 |     "                    print phrase, correct_phrase\n",
352 |     "    \n",
353 |     "        correct_sentence += correct_phrase\n",
354 |     "    \n",
355 |     "    if verbose:\n",
356 |     "        print correct_sentence\n",
357 |     "    return correct_sentence"
358 |    ]
359 |   },
360 |   {
361 |    "cell_type": "code",
362 |    "execution_count": 23,
363 |    "metadata": {
364 |     "collapsed": false,
365 |     "deletable": true,
366 |     "editable": true
367 |    },
368 |    "outputs": [
369 |     {
370 |      "name": "stdout",
371 |      "output_type": "stream",
372 |      "text": [
373 |       "机七 机器\n",
374 |       "领遇 领域\n",
375 |       "分知 分枝\n",
376 |       "机器学习是人工智能领域最能体现智能的一个分枝！\n"
377 |      ]
378 |     }
379 |    ],
380 |    "source": [
381 |     "err_sent = '机七学习是人工智能领遇最能体现智能的一个分知！'\n",
382 |     "correct_sent = auto_correct_sentence( err_sent )"
383 |    ]
384 |   },
385 |   {
386 |    "cell_type": "code",
387 |    "execution_count": 24,
388 |    "metadata": {
389 |     "collapsed": false,
390 |     "deletable": true,
391 |     "editable": true
392 |    },
393 |    "outputs": [
394 |     {
395 |      "name": "stdout",
396 |      "output_type": "stream",
397 |      "text": [
398 |       "机器学习是人工智能领域最能体现智能的一个分枝！\n"
399 |      ]
400 |     }
401 |    ],
402 |    "source": [
403 |     "print correct_sent"
404 |    ]
405 |   },
406 |   {
407 |    "cell_type": "code",
408 |    "execution_count": null,
409 |    "metadata": {
410 |     "collapsed": true,
411 |     "deletable": true,
412 |     "editable": true
413 |    },
414 |    "outputs": [],
415 |    "source": []
416 |   }
417 |  ],
418 |  "metadata": {
419 |   "kernelspec": {
420 |    "display_name": "nlp_interview",
421 |    "language": "python",
422 |    "name": "nlp_interview"
423 |   },
424 |   "language_info": {
425 |    "codemirror_mode": {
426 |     "name": "ipython",
427 |     "version": 2
428 |    },
429 |    "file_extension": ".py",
430 |    "mimetype": "text/x-python",
431 |    "name": "python",
432 |    "nbconvert_exporter": "python",
433 |    "pygments_lexer": "ipython2",
434 |    "version": "2.7.13"
435 |   }
436 |  },
437 |  "nbformat": 4,
438 |  "nbformat_minor": 2
439 | }
440 | 


--------------------------------------------------------------------------------
/AutoChecker4Chinese.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/beyondacm/Autochecker4Chinese/ca27a2aed69b79cdc639fc088b0a2f942c2a81f5/AutoChecker4Chinese.pdf


--------------------------------------------------------------------------------
/Autochecker4Chinese.py:
--------------------------------------------------------------------------------
  1 | # !/usr/bin/python
  2 | # -*- coding:utf-8 -*-
  3 | __author__ = "zpgao"
  4 | 
  5 | import sys
  6 | import pinyin
  7 | import jieba
  8 | import string
  9 | import re
 10 | 
 11 | FILE_PATH = "./token_freq_pos%40350k_jieba.txt"
 12 | PUNCTUATION_LIST = string.punctuation
 13 | PUNCTUATION_LIST += "。，？：；｛｝［］‘“”《》／！％……（）"
 14 | 
 15 | 
 16 | def construct_dict( file_path ):
 17 | 	
 18 | 	word_freq = {}
 19 | 	with open(file_path, "r") as f:
 20 | 		for line in f:
 21 | 			info = line.split()
 22 | 			word = info[0]
 23 | 			frequency = info[1]
 24 | 			word_freq[word] = frequency
 25 | 	
 26 | 	return word_freq
 27 | 
 28 | 
 29 | def load_cn_words_dict( file_path ):
 30 | 	cn_words_dict = ""
 31 | 	with open(file_path, "r") as f:
 32 | 		for word in f:
 33 | 			cn_words_dict += word.strip().decode("utf-8")
 34 | 	return cn_words_dict
 35 | 
 36 | 
 37 | def edits1(phrase, cn_words_dict):
 38 | 	"All edits that are one edit away from `phrase`."
 39 | 	phrase = phrase.decode("utf-8")
 40 | 	splits     = [(phrase[:i], phrase[i:])  for i in range(len(phrase) + 1)]
 41 | 	deletes    = [L + R[1:]                 for L, R in splits if R]
 42 | 	transposes = [L + R[1] + R[0] + R[2:]   for L, R in splits if len(R)>1]
 43 | 	replaces   = [L + c + R[1:]             for L, R in splits if R for c in cn_words_dict]
 44 | 	inserts    = [L + c + R                 for L, R in splits for c in cn_words_dict]
 45 | 	return set(deletes + transposes + replaces + inserts)
 46 | 
 47 | def known(phrases): return set(phrase for phrase in phrases if phrase.encode("utf-8") in phrase_freq)
 48 | 
 49 | 
 50 | def get_candidates( error_phrase ):
 51 | 	
 52 | 	candidates_1st_order = []
 53 | 	candidates_2nd_order = []
 54 | 	candidates_3nd_order = []
 55 | 	
 56 | 	error_pinyin = pinyin.get(error_phrase, format="strip", delimiter="/").encode("utf-8")
 57 | 	cn_words_dict = load_cn_words_dict( "./cn_dict.txt" )
 58 | 	candidate_phrases = list( known(edits1(error_phrase, cn_words_dict)) )
 59 | 	
 60 | 	for candidate_phrase in candidate_phrases:
 61 | 		candidate_pinyin = pinyin.get(candidate_phrase, format="strip", delimiter="/").encode("utf-8")
 62 | 		if candidate_pinyin == error_pinyin:
 63 | 			candidates_1st_order.append(candidate_phrase)
 64 | 		elif candidate_pinyin.split("/")[0] == error_pinyin.split("/")[0]:
 65 | 			candidates_2nd_order.append(candidate_phrase)
 66 | 		else:
 67 | 			candidates_3nd_order.append(candidate_phrase)
 68 | 	
 69 | 	return candidates_1st_order, candidates_2nd_order, candidates_3nd_order
 70 | 
 71 | 
 72 | def auto_correct( error_phrase ):
 73 | 	
 74 | 	c1_order, c2_order, c3_order = get_candidates(error_phrase)
 75 | 	# print c1_order, c2_order, c3_order
 76 | 	if c1_order:
 77 | 		return max(c1_order, key=phrase_freq.get )
 78 | 	elif c2_order:
 79 | 		return max(c2_order, key=phrase_freq.get )
 80 | 	else:
 81 | 		return max(c3_order, key=phrase_freq.get )
 82 | 
 83 | def auto_correct_sentence( error_sentence, verbose=True):
 84 | 	
 85 | 	jieba_cut = jieba.cut( error_sentence.decode("utf-8"), cut_all=False)
 86 | 	seg_list = "\t".join(jieba_cut).split("\t")
 87 | 	
 88 | 	correct_sentence = ""
 89 | 	
 90 | 	for phrase in seg_list:
 91 | 		
 92 | 		correct_phrase = phrase
 93 | 		# check if item is a punctuation
 94 | 		if phrase not in PUNCTUATION_LIST.decode("utf-8"):
 95 | 			# check if the phrase in our dict, if not then it is a misspelled phrase
 96 | 			if phrase.encode("utf-8") not in phrase_freq.keys():
 97 | 				correct_phrase = auto_correct(phrase.encode("utf-8"))
 98 | 				if verbose :
 99 | 					print phrase, correct_phrase
100 | 	
101 | 		correct_sentence += correct_phrase
102 | 
103 | 	return correct_sentence
104 | 
105 | 
106 | 
107 | phrase_freq = construct_dict( FILE_PATH )
108 | 
109 | def main():
110 | 
111 | 	err_sent_1 = '机七学习是人工智能领遇最能体现智能的一个分知！'
112 | 	print "Test case 1:"
113 | 	correct_sent = auto_correct_sentence( err_sent_1 )
114 | 	print "original sentence:" + err_sent_1 + "\n==>\n" + "corrected sentence:" + correct_sent
115 | 
116 | 	err_sent_2 = '杭洲是中国的八大古都之一，因风景锈丽，享有"人间天棠"的美誉！'
117 | 	print "Test case 2:"
118 | 	correct_sent = auto_correct_sentence( err_sent_2 )
119 | 	print "original sentence:" + err_sent_2 + "\n==>\n" + "corrected sentence:" + correct_sent
120 | 	
121 | if __name__=="__main__":
122 | 	reload(sys)
123 | 	sys.setdefaultencoding('utf-8')
124 | 	main()
125 | 
126 | 


--------------------------------------------------------------------------------
/cn_dict.txt:
--------------------------------------------------------------------------------
   1 | 一
   2 | 乙
   3 | 二
   4 | 十
   5 | 丁
   6 | 厂
   7 | 七
   8 | 卜
   9 | 人
  10 | 入
  11 | 八
  12 | 九
  13 | 几
  14 | 儿
  15 | 了
  16 | 力
  17 | 乃
  18 | 刀
  19 | 又
  20 | 三
  21 | 于
  22 | 干
  23 | 亏
  24 | 士
  25 | 工
  26 | 土
  27 | 才
  28 | 寸
  29 | 下
  30 | 大
  31 | 丈
  32 | 与
  33 | 万
  34 | 上
  35 | 小
  36 | 口
  37 | 巾
  38 | 山
  39 | 千
  40 | 乞
  41 | 川
  42 | 亿
  43 | 个
  44 | 勺
  45 | 久
  46 | 凡
  47 | 及
  48 | 夕
  49 | 丸
  50 | 么
  51 | 广
  52 | 亡
  53 | 门
  54 | 义
  55 | 之
  56 | 尸
  57 | 弓
  58 | 己
  59 | 已
  60 | 子
  61 | 卫
  62 | 也
  63 | 女
  64 | 飞
  65 | 刃
  66 | 习
  67 | 叉
  68 | 马
  69 | 乡
  70 | 丰
  71 | 王
  72 | 井
  73 | 开
  74 | 夫
  75 | 天
  76 | 无
  77 | 元
  78 | 专
  79 | 云
  80 | 扎
  81 | 艺
  82 | 木
  83 | 五
  84 | 支
  85 | 厅
  86 | 不
  87 | 太
  88 | 犬
  89 | 区
  90 | 历
  91 | 尤
  92 | 友
  93 | 匹
  94 | 车
  95 | 巨
  96 | 牙
  97 | 屯
  98 | 比
  99 | 互
 100 | 切
 101 | 瓦
 102 | 止
 103 | 少
 104 | 日
 105 | 中
 106 | 冈
 107 | 贝
 108 | 内
 109 | 水
 110 | 见
 111 | 午
 112 | 牛
 113 | 手
 114 | 毛
 115 | 气
 116 | 升
 117 | 长
 118 | 仁
 119 | 什
 120 | 片
 121 | 仆
 122 | 化
 123 | 仇
 124 | 币
 125 | 仍
 126 | 仅
 127 | 斤
 128 | 爪
 129 | 反
 130 | 介
 131 | 父
 132 | 从
 133 | 今
 134 | 凶
 135 | 分
 136 | 乏
 137 | 公
 138 | 仓
 139 | 月
 140 | 氏
 141 | 勿
 142 | 欠
 143 | 风
 144 | 丹
 145 | 匀
 146 | 乌
 147 | 凤
 148 | 勾
 149 | 文
 150 | 六
 151 | 方
 152 | 火
 153 | 为
 154 | 斗
 155 | 忆
 156 | 订
 157 | 计
 158 | 户
 159 | 认
 160 | 心
 161 | 尺
 162 | 引
 163 | 丑
 164 | 巴
 165 | 孔
 166 | 队
 167 | 办
 168 | 以
 169 | 允
 170 | 予
 171 | 劝
 172 | 双
 173 | 书
 174 | 幻
 175 | 玉
 176 | 刊
 177 | 示
 178 | 末
 179 | 未
 180 | 击
 181 | 打
 182 | 巧
 183 | 正
 184 | 扑
 185 | 扒
 186 | 功
 187 | 扔
 188 | 去
 189 | 甘
 190 | 世
 191 | 古
 192 | 节
 193 | 本
 194 | 术
 195 | 可
 196 | 丙
 197 | 左
 198 | 厉
 199 | 右
 200 | 石
 201 | 布
 202 | 龙
 203 | 平
 204 | 灭
 205 | 轧
 206 | 东
 207 | 卡
 208 | 北
 209 | 占
 210 | 业
 211 | 旧
 212 | 帅
 213 | 归
 214 | 且
 215 | 旦
 216 | 目
 217 | 叶
 218 | 甲
 219 | 申
 220 | 叮
 221 | 电
 222 | 号
 223 | 田
 224 | 由
 225 | 史
 226 | 只
 227 | 央
 228 | 兄
 229 | 叼
 230 | 叫
 231 | 另
 232 | 叨
 233 | 叹
 234 | 四
 235 | 生
 236 | 失
 237 | 禾
 238 | 丘
 239 | 付
 240 | 仗
 241 | 代
 242 | 仙
 243 | 们
 244 | 仪
 245 | 白
 246 | 仔
 247 | 他
 248 | 斥
 249 | 瓜
 250 | 乎
 251 | 丛
 252 | 令
 253 | 用
 254 | 甩
 255 | 印
 256 | 乐
 257 | 句
 258 | 匆
 259 | 册
 260 | 犯
 261 | 外
 262 | 处
 263 | 冬
 264 | 鸟
 265 | 务
 266 | 包
 267 | 饥
 268 | 主
 269 | 市
 270 | 立
 271 | 闪
 272 | 兰
 273 | 半
 274 | 汁
 275 | 汇
 276 | 头
 277 | 汉
 278 | 宁
 279 | 穴
 280 | 它
 281 | 讨
 282 | 写
 283 | 让
 284 | 礼
 285 | 训
 286 | 必
 287 | 议
 288 | 讯
 289 | 记
 290 | 永
 291 | 司
 292 | 尼
 293 | 民
 294 | 出
 295 | 辽
 296 | 奶
 297 | 奴
 298 | 加
 299 | 召
 300 | 皮
 301 | 边
 302 | 发
 303 | 孕
 304 | 圣
 305 | 对
 306 | 台
 307 | 矛
 308 | 纠
 309 | 母
 310 | 幼
 311 | 丝
 312 | 式
 313 | 刑
 314 | 动
 315 | 扛
 316 | 寺
 317 | 吉
 318 | 扣
 319 | 考
 320 | 托
 321 | 老
 322 | 执
 323 | 巩
 324 | 圾
 325 | 扩
 326 | 扫
 327 | 地
 328 | 扬
 329 | 场
 330 | 耳
 331 | 共
 332 | 芒
 333 | 亚
 334 | 芝
 335 | 朽
 336 | 朴
 337 | 机
 338 | 权
 339 | 过
 340 | 臣
 341 | 再
 342 | 协
 343 | 西
 344 | 压
 345 | 厌
 346 | 在
 347 | 有
 348 | 百
 349 | 存
 350 | 而
 351 | 页
 352 | 匠
 353 | 夸
 354 | 夺
 355 | 灰
 356 | 达
 357 | 列
 358 | 死
 359 | 成
 360 | 夹
 361 | 轨
 362 | 邪
 363 | 划
 364 | 迈
 365 | 毕
 366 | 至
 367 | 此
 368 | 贞
 369 | 师
 370 | 尘
 371 | 尖
 372 | 劣
 373 | 光
 374 | 当
 375 | 早
 376 | 吐
 377 | 吓
 378 | 虫
 379 | 曲
 380 | 团
 381 | 同
 382 | 吊
 383 | 吃
 384 | 因
 385 | 吸
 386 | 吗
 387 | 屿
 388 | 帆
 389 | 岁
 390 | 回
 391 | 岂
 392 | 刚
 393 | 则
 394 | 肉
 395 | 网
 396 | 年
 397 | 朱
 398 | 先
 399 | 丢
 400 | 舌
 401 | 竹
 402 | 迁
 403 | 乔
 404 | 伟
 405 | 传
 406 | 乒
 407 | 乓
 408 | 休
 409 | 伍
 410 | 伏
 411 | 优
 412 | 伐
 413 | 延
 414 | 件
 415 | 任
 416 | 伤
 417 | 价
 418 | 份
 419 | 华
 420 | 仰
 421 | 仿
 422 | 伙
 423 | 伪
 424 | 自
 425 | 血
 426 | 向
 427 | 似
 428 | 后
 429 | 行
 430 | 舟
 431 | 全
 432 | 会
 433 | 杀
 434 | 合
 435 | 兆
 436 | 企
 437 | 众
 438 | 爷
 439 | 伞
 440 | 创
 441 | 肌
 442 | 朵
 443 | 杂
 444 | 危
 445 | 旬
 446 | 旨
 447 | 负
 448 | 各
 449 | 名
 450 | 多
 451 | 争
 452 | 色
 453 | 壮
 454 | 冲
 455 | 冰
 456 | 庄
 457 | 庆
 458 | 亦
 459 | 刘
 460 | 齐
 461 | 交
 462 | 次
 463 | 衣
 464 | 产
 465 | 决
 466 | 充
 467 | 妄
 468 | 闭
 469 | 问
 470 | 闯
 471 | 羊
 472 | 并
 473 | 关
 474 | 米
 475 | 灯
 476 | 州
 477 | 汗
 478 | 污
 479 | 江
 480 | 池
 481 | 汤
 482 | 忙
 483 | 兴
 484 | 宇
 485 | 守
 486 | 宅
 487 | 字
 488 | 安
 489 | 讲
 490 | 军
 491 | 许
 492 | 论
 493 | 农
 494 | 讽
 495 | 设
 496 | 访
 497 | 寻
 498 | 那
 499 | 迅
 500 | 尽
 501 | 导
 502 | 异
 503 | 孙
 504 | 阵
 505 | 阳
 506 | 收
 507 | 阶
 508 | 阴
 509 | 防
 510 | 奸
 511 | 如
 512 | 妇
 513 | 好
 514 | 她
 515 | 妈
 516 | 戏
 517 | 羽
 518 | 观
 519 | 欢
 520 | 买
 521 | 红
 522 | 纤
 523 | 级
 524 | 约
 525 | 纪
 526 | 驰
 527 | 巡
 528 | 寿
 529 | 弄
 530 | 麦
 531 | 形
 532 | 进
 533 | 戒
 534 | 吞
 535 | 远
 536 | 违
 537 | 运
 538 | 扶
 539 | 抚
 540 | 坛
 541 | 技
 542 | 坏
 543 | 扰
 544 | 拒
 545 | 找
 546 | 批
 547 | 扯
 548 | 址
 549 | 走
 550 | 抄
 551 | 坝
 552 | 贡
 553 | 攻
 554 | 赤
 555 | 折
 556 | 抓
 557 | 扮
 558 | 抢
 559 | 孝
 560 | 均
 561 | 抛
 562 | 投
 563 | 坟
 564 | 抗
 565 | 坑
 566 | 坊
 567 | 抖
 568 | 护
 569 | 壳
 570 | 志
 571 | 扭
 572 | 块
 573 | 声
 574 | 把
 575 | 报
 576 | 却
 577 | 劫
 578 | 芽
 579 | 花
 580 | 芹
 581 | 芬
 582 | 苍
 583 | 芳
 584 | 严
 585 | 芦
 586 | 劳
 587 | 克
 588 | 苏
 589 | 杆
 590 | 杠
 591 | 杜
 592 | 材
 593 | 村
 594 | 杏
 595 | 极
 596 | 李
 597 | 杨
 598 | 求
 599 | 更
 600 | 束
 601 | 豆
 602 | 两
 603 | 丽
 604 | 医
 605 | 辰
 606 | 励
 607 | 否
 608 | 还
 609 | 歼
 610 | 来
 611 | 连
 612 | 步
 613 | 坚
 614 | 旱
 615 | 盯
 616 | 呈
 617 | 时
 618 | 吴
 619 | 助
 620 | 县
 621 | 里
 622 | 呆
 623 | 园
 624 | 旷
 625 | 围
 626 | 呀
 627 | 吨
 628 | 足
 629 | 邮
 630 | 男
 631 | 困
 632 | 吵
 633 | 串
 634 | 员
 635 | 听
 636 | 吩
 637 | 吹
 638 | 呜
 639 | 吧
 640 | 吼
 641 | 别
 642 | 岗
 643 | 帐
 644 | 财
 645 | 针
 646 | 钉
 647 | 告
 648 | 我
 649 | 乱
 650 | 利
 651 | 秃
 652 | 秀
 653 | 私
 654 | 每
 655 | 兵
 656 | 估
 657 | 体
 658 | 何
 659 | 但
 660 | 伸
 661 | 作
 662 | 伯
 663 | 伶
 664 | 佣
 665 | 低
 666 | 你
 667 | 住
 668 | 位
 669 | 伴
 670 | 身
 671 | 皂
 672 | 佛
 673 | 近
 674 | 彻
 675 | 役
 676 | 返
 677 | 余
 678 | 希
 679 | 坐
 680 | 谷
 681 | 妥
 682 | 含
 683 | 邻
 684 | 岔
 685 | 肝
 686 | 肚
 687 | 肠
 688 | 龟
 689 | 免
 690 | 狂
 691 | 犹
 692 | 角
 693 | 删
 694 | 条
 695 | 卵
 696 | 岛
 697 | 迎
 698 | 饭
 699 | 饮
 700 | 系
 701 | 言
 702 | 冻
 703 | 状
 704 | 亩
 705 | 况
 706 | 床
 707 | 库
 708 | 疗
 709 | 应
 710 | 冷
 711 | 这
 712 | 序
 713 | 辛
 714 | 弃
 715 | 冶
 716 | 忘
 717 | 闲
 718 | 间
 719 | 闷
 720 | 判
 721 | 灶
 722 | 灿
 723 | 弟
 724 | 汪
 725 | 沙
 726 | 汽
 727 | 沃
 728 | 泛
 729 | 沟
 730 | 没
 731 | 沈
 732 | 沉
 733 | 怀
 734 | 忧
 735 | 快
 736 | 完
 737 | 宋
 738 | 宏
 739 | 牢
 740 | 究
 741 | 穷
 742 | 灾
 743 | 良
 744 | 证
 745 | 启
 746 | 评
 747 | 补
 748 | 初
 749 | 社
 750 | 识
 751 | 诉
 752 | 诊
 753 | 词
 754 | 译
 755 | 君
 756 | 灵
 757 | 即
 758 | 层
 759 | 尿
 760 | 尾
 761 | 迟
 762 | 局
 763 | 改
 764 | 张
 765 | 忌
 766 | 际
 767 | 陆
 768 | 阿
 769 | 陈
 770 | 阻
 771 | 附
 772 | 妙
 773 | 妖
 774 | 妨
 775 | 努
 776 | 忍
 777 | 劲
 778 | 鸡
 779 | 驱
 780 | 纯
 781 | 纱
 782 | 纳
 783 | 纲
 784 | 驳
 785 | 纵
 786 | 纷
 787 | 纸
 788 | 纹
 789 | 纺
 790 | 驴
 791 | 纽
 792 | 奉
 793 | 玩
 794 | 环
 795 | 武
 796 | 青
 797 | 责
 798 | 现
 799 | 表
 800 | 规
 801 | 抹
 802 | 拢
 803 | 拔
 804 | 拣
 805 | 担
 806 | 坦
 807 | 押
 808 | 抽
 809 | 拐
 810 | 拖
 811 | 拍
 812 | 者
 813 | 顶
 814 | 拆
 815 | 拥
 816 | 抵
 817 | 拘
 818 | 势
 819 | 抱
 820 | 垃
 821 | 拉
 822 | 拦
 823 | 拌
 824 | 幸
 825 | 招
 826 | 坡
 827 | 披
 828 | 拨
 829 | 择
 830 | 抬
 831 | 其
 832 | 取
 833 | 苦
 834 | 若
 835 | 茂
 836 | 苹
 837 | 苗
 838 | 英
 839 | 范
 840 | 直
 841 | 茄
 842 | 茎
 843 | 茅
 844 | 林
 845 | 枝
 846 | 杯
 847 | 柜
 848 | 析
 849 | 板
 850 | 松
 851 | 枪
 852 | 构
 853 | 杰
 854 | 述
 855 | 枕
 856 | 丧
 857 | 或
 858 | 画
 859 | 卧
 860 | 事
 861 | 刺
 862 | 枣
 863 | 雨
 864 | 卖
 865 | 矿
 866 | 码
 867 | 厕
 868 | 奔
 869 | 奇
 870 | 奋
 871 | 态
 872 | 欧
 873 | 垄
 874 | 妻
 875 | 轰
 876 | 顷
 877 | 转
 878 | 斩
 879 | 轮
 880 | 软
 881 | 到
 882 | 非
 883 | 叔
 884 | 肯
 885 | 齿
 886 | 些
 887 | 虎
 888 | 虏
 889 | 肾
 890 | 贤
 891 | 尚
 892 | 旺
 893 | 具
 894 | 果
 895 | 味
 896 | 昆
 897 | 国
 898 | 昌
 899 | 畅
 900 | 明
 901 | 易
 902 | 昂
 903 | 典
 904 | 固
 905 | 忠
 906 | 咐
 907 | 呼
 908 | 鸣
 909 | 咏
 910 | 呢
 911 | 岸
 912 | 岩
 913 | 帖
 914 | 罗
 915 | 帜
 916 | 岭
 917 | 凯
 918 | 败
 919 | 贩
 920 | 购
 921 | 图
 922 | 钓
 923 | 制
 924 | 知
 925 | 垂
 926 | 牧
 927 | 物
 928 | 乖
 929 | 刮
 930 | 秆
 931 | 和
 932 | 季
 933 | 委
 934 | 佳
 935 | 侍
 936 | 供
 937 | 使
 938 | 例
 939 | 版
 940 | 侄
 941 | 侦
 942 | 侧
 943 | 凭
 944 | 侨
 945 | 佩
 946 | 货
 947 | 依
 948 | 的
 949 | 迫
 950 | 质
 951 | 欣
 952 | 征
 953 | 往
 954 | 爬
 955 | 彼
 956 | 径
 957 | 所
 958 | 舍
 959 | 金
 960 | 命
 961 | 斧
 962 | 爸
 963 | 采
 964 | 受
 965 | 乳
 966 | 贪
 967 | 念
 968 | 贫
 969 | 肤
 970 | 肺
 971 | 肢
 972 | 肿
 973 | 胀
 974 | 朋
 975 | 股
 976 | 肥
 977 | 服
 978 | 胁
 979 | 周
 980 | 昏
 981 | 鱼
 982 | 兔
 983 | 狐
 984 | 忽
 985 | 狗
 986 | 备
 987 | 饰
 988 | 饱
 989 | 饲
 990 | 变
 991 | 京
 992 | 享
 993 | 店
 994 | 夜
 995 | 庙
 996 | 府
 997 | 底
 998 | 剂
 999 | 郊
1000 | 废
1001 | 净
1002 | 盲
1003 | 放
1004 | 刻
1005 | 育
1006 | 闸
1007 | 闹
1008 | 郑
1009 | 券
1010 | 卷
1011 | 单
1012 | 炒
1013 | 炊
1014 | 炕
1015 | 炎
1016 | 炉
1017 | 沫
1018 | 浅
1019 | 法
1020 | 泄
1021 | 河
1022 | 沾
1023 | 泪
1024 | 油
1025 | 泊
1026 | 沿
1027 | 泡
1028 | 注
1029 | 泻
1030 | 泳
1031 | 泥
1032 | 沸
1033 | 波
1034 | 泼
1035 | 泽
1036 | 治
1037 | 怖
1038 | 性
1039 | 怕
1040 | 怜
1041 | 怪
1042 | 学
1043 | 宝
1044 | 宗
1045 | 定
1046 | 宜
1047 | 审
1048 | 宙
1049 | 官
1050 | 空
1051 | 帘
1052 | 实
1053 | 试
1054 | 郎
1055 | 诗
1056 | 肩
1057 | 房
1058 | 诚
1059 | 衬
1060 | 衫
1061 | 视
1062 | 话
1063 | 诞
1064 | 询
1065 | 该
1066 | 详
1067 | 建
1068 | 肃
1069 | 录
1070 | 隶
1071 | 居
1072 | 届
1073 | 刷
1074 | 屈
1075 | 弦
1076 | 承
1077 | 孟
1078 | 孤
1079 | 陕
1080 | 降
1081 | 限
1082 | 妹
1083 | 姑
1084 | 姐
1085 | 姓
1086 | 始
1087 | 驾
1088 | 参
1089 | 艰
1090 | 线
1091 | 练
1092 | 组
1093 | 细
1094 | 驶
1095 | 织
1096 | 终
1097 | 驻
1098 | 驼
1099 | 绍
1100 | 经
1101 | 贯
1102 | 奏
1103 | 春
1104 | 帮
1105 | 珍
1106 | 玻
1107 | 毒
1108 | 型
1109 | 挂
1110 | 封
1111 | 持
1112 | 项
1113 | 垮
1114 | 挎
1115 | 城
1116 | 挠
1117 | 政
1118 | 赴
1119 | 赵
1120 | 挡
1121 | 挺
1122 | 括
1123 | 拴
1124 | 拾
1125 | 挑
1126 | 指
1127 | 垫
1128 | 挣
1129 | 挤
1130 | 拼
1131 | 挖
1132 | 按
1133 | 挥
1134 | 挪
1135 | 某
1136 | 甚
1137 | 革
1138 | 荐
1139 | 巷
1140 | 带
1141 | 草
1142 | 茧
1143 | 茶
1144 | 荒
1145 | 茫
1146 | 荡
1147 | 荣
1148 | 故
1149 | 胡
1150 | 南
1151 | 药
1152 | 标
1153 | 枯
1154 | 柄
1155 | 栋
1156 | 相
1157 | 查
1158 | 柏
1159 | 柳
1160 | 柱
1161 | 柿
1162 | 栏
1163 | 树
1164 | 要
1165 | 咸
1166 | 威
1167 | 歪
1168 | 研
1169 | 砖
1170 | 厘
1171 | 厚
1172 | 砌
1173 | 砍
1174 | 面
1175 | 耐
1176 | 耍
1177 | 牵
1178 | 残
1179 | 殃
1180 | 轻
1181 | 鸦
1182 | 皆
1183 | 背
1184 | 战
1185 | 点
1186 | 临
1187 | 览
1188 | 竖
1189 | 省
1190 | 削
1191 | 尝
1192 | 是
1193 | 盼
1194 | 眨
1195 | 哄
1196 | 显
1197 | 哑
1198 | 冒
1199 | 映
1200 | 星
1201 | 昨
1202 | 畏
1203 | 趴
1204 | 胃
1205 | 贵
1206 | 界
1207 | 虹
1208 | 虾
1209 | 蚁
1210 | 思
1211 | 蚂
1212 | 虽
1213 | 品
1214 | 咽
1215 | 骂
1216 | 哗
1217 | 咱
1218 | 响
1219 | 哈
1220 | 咬
1221 | 咳
1222 | 哪
1223 | 炭
1224 | 峡
1225 | 罚
1226 | 贱
1227 | 贴
1228 | 骨
1229 | 钞
1230 | 钟
1231 | 钢
1232 | 钥
1233 | 钩
1234 | 卸
1235 | 缸
1236 | 拜
1237 | 看
1238 | 矩
1239 | 怎
1240 | 牲
1241 | 选
1242 | 适
1243 | 秒
1244 | 香
1245 | 种
1246 | 秋
1247 | 科
1248 | 重
1249 | 复
1250 | 竿
1251 | 段
1252 | 便
1253 | 俩
1254 | 贷
1255 | 顺
1256 | 修
1257 | 保
1258 | 促
1259 | 侮
1260 | 俭
1261 | 俗
1262 | 俘
1263 | 信
1264 | 皇
1265 | 泉
1266 | 鬼
1267 | 侵
1268 | 追
1269 | 俊
1270 | 盾
1271 | 待
1272 | 律
1273 | 很
1274 | 须
1275 | 叙
1276 | 剑
1277 | 逃
1278 | 食
1279 | 盆
1280 | 胆
1281 | 胜
1282 | 胞
1283 | 胖
1284 | 脉
1285 | 勉
1286 | 狭
1287 | 狮
1288 | 独
1289 | 狡
1290 | 狱
1291 | 狠
1292 | 贸
1293 | 怨
1294 | 急
1295 | 饶
1296 | 蚀
1297 | 饺
1298 | 饼
1299 | 弯
1300 | 将
1301 | 奖
1302 | 哀
1303 | 亭
1304 | 亮
1305 | 度
1306 | 迹
1307 | 庭
1308 | 疮
1309 | 疯
1310 | 疫
1311 | 疤
1312 | 姿
1313 | 亲
1314 | 音
1315 | 帝
1316 | 施
1317 | 闻
1318 | 阀
1319 | 阁
1320 | 差
1321 | 养
1322 | 美
1323 | 姜
1324 | 叛
1325 | 送
1326 | 类
1327 | 迷
1328 | 前
1329 | 首
1330 | 逆
1331 | 总
1332 | 炼
1333 | 炸
1334 | 炮
1335 | 烂
1336 | 剃
1337 | 洁
1338 | 洪
1339 | 洒
1340 | 浇
1341 | 浊
1342 | 洞
1343 | 测
1344 | 洗
1345 | 活
1346 | 派
1347 | 洽
1348 | 染
1349 | 济
1350 | 洋
1351 | 洲
1352 | 浑
1353 | 浓
1354 | 津
1355 | 恒
1356 | 恢
1357 | 恰
1358 | 恼
1359 | 恨
1360 | 举
1361 | 觉
1362 | 宣
1363 | 室
1364 | 宫
1365 | 宪
1366 | 突
1367 | 穿
1368 | 窃
1369 | 客
1370 | 冠
1371 | 语
1372 | 扁
1373 | 袄
1374 | 祖
1375 | 神
1376 | 祝
1377 | 误
1378 | 诱
1379 | 说
1380 | 诵
1381 | 垦
1382 | 退
1383 | 既
1384 | 屋
1385 | 昼
1386 | 费
1387 | 陡
1388 | 眉
1389 | 孩
1390 | 除
1391 | 险
1392 | 院
1393 | 娃
1394 | 姥
1395 | 姨
1396 | 姻
1397 | 娇
1398 | 怒
1399 | 架
1400 | 贺
1401 | 盈
1402 | 勇
1403 | 怠
1404 | 柔
1405 | 垒
1406 | 绑
1407 | 绒
1408 | 结
1409 | 绕
1410 | 骄
1411 | 绘
1412 | 给
1413 | 络
1414 | 骆
1415 | 绝
1416 | 绞
1417 | 统
1418 | 耕
1419 | 耗
1420 | 艳
1421 | 泰
1422 | 珠
1423 | 班
1424 | 素
1425 | 蚕
1426 | 顽
1427 | 盏
1428 | 匪
1429 | 捞
1430 | 栽
1431 | 捕
1432 | 振
1433 | 载
1434 | 赶
1435 | 起
1436 | 盐
1437 | 捎
1438 | 捏
1439 | 埋
1440 | 捉
1441 | 捆
1442 | 捐
1443 | 损
1444 | 都
1445 | 哲
1446 | 逝
1447 | 捡
1448 | 换
1449 | 挽
1450 | 热
1451 | 恐
1452 | 壶
1453 | 挨
1454 | 耻
1455 | 耽
1456 | 恭
1457 | 莲
1458 | 莫
1459 | 荷
1460 | 获
1461 | 晋
1462 | 恶
1463 | 真
1464 | 框
1465 | 桂
1466 | 档
1467 | 桐
1468 | 株
1469 | 桥
1470 | 桃
1471 | 格
1472 | 校
1473 | 核
1474 | 样
1475 | 根
1476 | 索
1477 | 哥
1478 | 速
1479 | 逗
1480 | 栗
1481 | 配
1482 | 翅
1483 | 辱
1484 | 唇
1485 | 夏
1486 | 础
1487 | 破
1488 | 原
1489 | 套
1490 | 逐
1491 | 烈
1492 | 殊
1493 | 顾
1494 | 轿
1495 | 较
1496 | 顿
1497 | 毙
1498 | 致
1499 | 柴
1500 | 桌
1501 | 虑
1502 | 监
1503 | 紧
1504 | 党
1505 | 晒
1506 | 眠
1507 | 晓
1508 | 鸭
1509 | 晃
1510 | 晌
1511 | 晕
1512 | 蚊
1513 | 哨
1514 | 哭
1515 | 恩
1516 | 唤
1517 | 啊
1518 | 唉
1519 | 罢
1520 | 峰
1521 | 圆
1522 | 贼
1523 | 贿
1524 | 钱
1525 | 钳
1526 | 钻
1527 | 铁
1528 | 铃
1529 | 铅
1530 | 缺
1531 | 氧
1532 | 特
1533 | 牺
1534 | 造
1535 | 乘
1536 | 敌
1537 | 秤
1538 | 租
1539 | 积
1540 | 秧
1541 | 秩
1542 | 称
1543 | 秘
1544 | 透
1545 | 笔
1546 | 笑
1547 | 笋
1548 | 债
1549 | 借
1550 | 值
1551 | 倚
1552 | 倾
1553 | 倒
1554 | 倘
1555 | 俱
1556 | 倡
1557 | 候
1558 | 俯
1559 | 倍
1560 | 倦
1561 | 健
1562 | 臭
1563 | 射
1564 | 躬
1565 | 息
1566 | 徒
1567 | 徐
1568 | 舰
1569 | 舱
1570 | 般
1571 | 航
1572 | 途
1573 | 拿
1574 | 爹
1575 | 爱
1576 | 颂
1577 | 翁
1578 | 脆
1579 | 脂
1580 | 胸
1581 | 胳
1582 | 脏
1583 | 胶
1584 | 脑
1585 | 狸
1586 | 狼
1587 | 逢
1588 | 留
1589 | 皱
1590 | 饿
1591 | 恋
1592 | 桨
1593 | 浆
1594 | 衰
1595 | 高
1596 | 席
1597 | 准
1598 | 座
1599 | 脊
1600 | 症
1601 | 病
1602 | 疾
1603 | 疼
1604 | 疲
1605 | 效
1606 | 离
1607 | 唐
1608 | 资
1609 | 凉
1610 | 站
1611 | 剖
1612 | 竞
1613 | 部
1614 | 旁
1615 | 旅
1616 | 畜
1617 | 阅
1618 | 羞
1619 | 瓶
1620 | 拳
1621 | 粉
1622 | 料
1623 | 益
1624 | 兼
1625 | 烤
1626 | 烘
1627 | 烦
1628 | 烧
1629 | 烛
1630 | 烟
1631 | 递
1632 | 涛
1633 | 浙
1634 | 涝
1635 | 酒
1636 | 涉
1637 | 消
1638 | 浩
1639 | 海
1640 | 涂
1641 | 浴
1642 | 浮
1643 | 流
1644 | 润
1645 | 浪
1646 | 浸
1647 | 涨
1648 | 烫
1649 | 涌
1650 | 悟
1651 | 悄
1652 | 悔
1653 | 悦
1654 | 害
1655 | 宽
1656 | 家
1657 | 宵
1658 | 宴
1659 | 宾
1660 | 窄
1661 | 容
1662 | 宰
1663 | 案
1664 | 请
1665 | 朗
1666 | 诸
1667 | 读
1668 | 扇
1669 | 袜
1670 | 袖
1671 | 袍
1672 | 被
1673 | 祥
1674 | 课
1675 | 谁
1676 | 调
1677 | 冤
1678 | 谅
1679 | 谈
1680 | 谊
1681 | 剥
1682 | 恳
1683 | 展
1684 | 剧
1685 | 屑
1686 | 弱
1687 | 陵
1688 | 陶
1689 | 陷
1690 | 陪
1691 | 娱
1692 | 娘
1693 | 通
1694 | 能
1695 | 难
1696 | 预
1697 | 桑
1698 | 绢
1699 | 绣
1700 | 验
1701 | 继
1702 | 球
1703 | 理
1704 | 捧
1705 | 堵
1706 | 描
1707 | 域
1708 | 掩
1709 | 捷
1710 | 排
1711 | 掉
1712 | 堆
1713 | 推
1714 | 掀
1715 | 授
1716 | 教
1717 | 掏
1718 | 掠
1719 | 培
1720 | 接
1721 | 控
1722 | 探
1723 | 据
1724 | 掘
1725 | 职
1726 | 基
1727 | 著
1728 | 勒
1729 | 黄
1730 | 萌
1731 | 萝
1732 | 菌
1733 | 菜
1734 | 萄
1735 | 菊
1736 | 萍
1737 | 菠
1738 | 营
1739 | 械
1740 | 梦
1741 | 梢
1742 | 梅
1743 | 检
1744 | 梳
1745 | 梯
1746 | 桶
1747 | 救
1748 | 副
1749 | 票
1750 | 戚
1751 | 爽
1752 | 聋
1753 | 袭
1754 | 盛
1755 | 雪
1756 | 辅
1757 | 辆
1758 | 虚
1759 | 雀
1760 | 堂
1761 | 常
1762 | 匙
1763 | 晨
1764 | 睁
1765 | 眯
1766 | 眼
1767 | 悬
1768 | 野
1769 | 啦
1770 | 晚
1771 | 啄
1772 | 距
1773 | 跃
1774 | 略
1775 | 蛇
1776 | 累
1777 | 唱
1778 | 患
1779 | 唯
1780 | 崖
1781 | 崭
1782 | 崇
1783 | 圈
1784 | 铜
1785 | 铲
1786 | 银
1787 | 甜
1788 | 梨
1789 | 犁
1790 | 移
1791 | 笨
1792 | 笼
1793 | 笛
1794 | 符
1795 | 第
1796 | 敏
1797 | 做
1798 | 袋
1799 | 悠
1800 | 偿
1801 | 偶
1802 | 偷
1803 | 您
1804 | 售
1805 | 停
1806 | 偏
1807 | 假
1808 | 得
1809 | 衔
1810 | 盘
1811 | 船
1812 | 斜
1813 | 盒
1814 | 鸽
1815 | 悉
1816 | 欲
1817 | 彩
1818 | 领
1819 | 脚
1820 | 脖
1821 | 脸
1822 | 脱
1823 | 象
1824 | 够
1825 | 猜
1826 | 猪
1827 | 猎
1828 | 猫
1829 | 猛
1830 | 馅
1831 | 馆
1832 | 凑
1833 | 减
1834 | 毫
1835 | 麻
1836 | 痒
1837 | 痕
1838 | 廊
1839 | 康
1840 | 庸
1841 | 鹿
1842 | 盗
1843 | 章
1844 | 竟
1845 | 商
1846 | 族
1847 | 旋
1848 | 望
1849 | 率
1850 | 着
1851 | 盖
1852 | 粘
1853 | 粗
1854 | 粒
1855 | 断
1856 | 剪
1857 | 兽
1858 | 清
1859 | 添
1860 | 淋
1861 | 淹
1862 | 渠
1863 | 渐
1864 | 混
1865 | 渔
1866 | 淘
1867 | 液
1868 | 淡
1869 | 深
1870 | 婆
1871 | 梁
1872 | 渗
1873 | 情
1874 | 惜
1875 | 惭
1876 | 悼
1877 | 惧
1878 | 惕
1879 | 惊
1880 | 惨
1881 | 惯
1882 | 寇
1883 | 寄
1884 | 宿
1885 | 窑
1886 | 密
1887 | 谋
1888 | 谎
1889 | 祸
1890 | 谜
1891 | 逮
1892 | 敢
1893 | 屠
1894 | 弹
1895 | 随
1896 | 蛋
1897 | 隆
1898 | 隐
1899 | 婚
1900 | 婶
1901 | 颈
1902 | 绩
1903 | 绪
1904 | 续
1905 | 骑
1906 | 绳
1907 | 维
1908 | 绵
1909 | 绸
1910 | 绿
1911 | 琴
1912 | 斑
1913 | 替
1914 | 款
1915 | 堪
1916 | 搭
1917 | 塔
1918 | 越
1919 | 趁
1920 | 趋
1921 | 超
1922 | 提
1923 | 堤
1924 | 博
1925 | 揭
1926 | 喜
1927 | 插
1928 | 揪
1929 | 搜
1930 | 煮
1931 | 援
1932 | 裁
1933 | 搁
1934 | 搂
1935 | 搅
1936 | 握
1937 | 揉
1938 | 斯
1939 | 期
1940 | 欺
1941 | 联
1942 | 散
1943 | 惹
1944 | 葬
1945 | 葛
1946 | 董
1947 | 葡
1948 | 敬
1949 | 葱
1950 | 落
1951 | 朝
1952 | 辜
1953 | 葵
1954 | 棒
1955 | 棋
1956 | 植
1957 | 森
1958 | 椅
1959 | 椒
1960 | 棵
1961 | 棍
1962 | 棉
1963 | 棚
1964 | 棕
1965 | 惠
1966 | 惑
1967 | 逼
1968 | 厨
1969 | 厦
1970 | 硬
1971 | 确
1972 | 雁
1973 | 殖
1974 | 裂
1975 | 雄
1976 | 暂
1977 | 雅
1978 | 辈
1979 | 悲
1980 | 紫
1981 | 辉
1982 | 敞
1983 | 赏
1984 | 掌
1985 | 晴
1986 | 暑
1987 | 最
1988 | 量
1989 | 喷
1990 | 晶
1991 | 喇
1992 | 遇
1993 | 喊
1994 | 景
1995 | 践
1996 | 跌
1997 | 跑
1998 | 遗
1999 | 蛙
2000 | 蛛
2001 | 蜓
2002 | 喝
2003 | 喂
2004 | 喘
2005 | 喉
2006 | 幅
2007 | 帽
2008 | 赌
2009 | 赔
2010 | 黑
2011 | 铸
2012 | 铺
2013 | 链
2014 | 销
2015 | 锁
2016 | 锄
2017 | 锅
2018 | 锈
2019 | 锋
2020 | 锐
2021 | 短
2022 | 智
2023 | 毯
2024 | 鹅
2025 | 剩
2026 | 稍
2027 | 程
2028 | 稀
2029 | 税
2030 | 筐
2031 | 等
2032 | 筑
2033 | 策
2034 | 筛
2035 | 筒
2036 | 答
2037 | 筋
2038 | 筝
2039 | 傲
2040 | 傅
2041 | 牌
2042 | 堡
2043 | 集
2044 | 焦
2045 | 傍
2046 | 储
2047 | 奥
2048 | 街
2049 | 惩
2050 | 御
2051 | 循
2052 | 艇
2053 | 舒
2054 | 番
2055 | 释
2056 | 禽
2057 | 腊
2058 | 脾
2059 | 腔
2060 | 鲁
2061 | 猾
2062 | 猴
2063 | 然
2064 | 馋
2065 | 装
2066 | 蛮
2067 | 就
2068 | 痛
2069 | 童
2070 | 阔
2071 | 善
2072 | 羡
2073 | 普
2074 | 粪
2075 | 尊
2076 | 道
2077 | 曾
2078 | 焰
2079 | 港
2080 | 湖
2081 | 渣
2082 | 湿
2083 | 温
2084 | 渴
2085 | 滑
2086 | 湾
2087 | 渡
2088 | 游
2089 | 滋
2090 | 溉
2091 | 愤
2092 | 慌
2093 | 惰
2094 | 愧
2095 | 愉
2096 | 慨
2097 | 割
2098 | 寒
2099 | 富
2100 | 窜
2101 | 窝
2102 | 窗
2103 | 遍
2104 | 裕
2105 | 裤
2106 | 裙
2107 | 谢
2108 | 谣
2109 | 谦
2110 | 属
2111 | 屡
2112 | 强
2113 | 粥
2114 | 疏
2115 | 隔
2116 | 隙
2117 | 絮
2118 | 嫂
2119 | 登
2120 | 缎
2121 | 缓
2122 | 编
2123 | 骗
2124 | 缘
2125 | 瑞
2126 | 魂
2127 | 肆
2128 | 摄
2129 | 摸
2130 | 填
2131 | 搏
2132 | 塌
2133 | 鼓
2134 | 摆
2135 | 携
2136 | 搬
2137 | 摇
2138 | 搞
2139 | 塘
2140 | 摊
2141 | 蒜
2142 | 勤
2143 | 鹊
2144 | 蓝
2145 | 墓
2146 | 幕
2147 | 蓬
2148 | 蓄
2149 | 蒙
2150 | 蒸
2151 | 献
2152 | 禁
2153 | 楚
2154 | 想
2155 | 槐
2156 | 榆
2157 | 楼
2158 | 概
2159 | 赖
2160 | 酬
2161 | 感
2162 | 碍
2163 | 碑
2164 | 碎
2165 | 碰
2166 | 碗
2167 | 碌
2168 | 雷
2169 | 零
2170 | 雾
2171 | 雹
2172 | 输
2173 | 督
2174 | 龄
2175 | 鉴
2176 | 睛
2177 | 睡
2178 | 睬
2179 | 鄙
2180 | 愚
2181 | 暖
2182 | 盟
2183 | 歇
2184 | 暗
2185 | 照
2186 | 跨
2187 | 跳
2188 | 跪
2189 | 路
2190 | 跟
2191 | 遣
2192 | 蛾
2193 | 蜂
2194 | 嗓
2195 | 置
2196 | 罪
2197 | 罩
2198 | 错
2199 | 锡
2200 | 锣
2201 | 锤
2202 | 锦
2203 | 键
2204 | 锯
2205 | 矮
2206 | 辞
2207 | 稠
2208 | 愁
2209 | 筹
2210 | 签
2211 | 简
2212 | 毁
2213 | 舅
2214 | 鼠
2215 | 催
2216 | 傻
2217 | 像
2218 | 躲
2219 | 微
2220 | 愈
2221 | 遥
2222 | 腰
2223 | 腥
2224 | 腹
2225 | 腾
2226 | 腿
2227 | 触
2228 | 解
2229 | 酱
2230 | 痰
2231 | 廉
2232 | 新
2233 | 韵
2234 | 意
2235 | 粮
2236 | 数
2237 | 煎
2238 | 塑
2239 | 慈
2240 | 煤
2241 | 煌
2242 | 满
2243 | 漠
2244 | 源
2245 | 滤
2246 | 滥
2247 | 滔
2248 | 溪
2249 | 溜
2250 | 滚
2251 | 滨
2252 | 粱
2253 | 滩
2254 | 慎
2255 | 誉
2256 | 塞
2257 | 谨
2258 | 福
2259 | 群
2260 | 殿
2261 | 辟
2262 | 障
2263 | 嫌
2264 | 嫁
2265 | 叠
2266 | 缝
2267 | 缠
2268 | 静
2269 | 碧
2270 | 璃
2271 | 墙
2272 | 撇
2273 | 嘉
2274 | 摧
2275 | 截
2276 | 誓
2277 | 境
2278 | 摘
2279 | 摔
2280 | 聚
2281 | 蔽
2282 | 慕
2283 | 暮
2284 | 蔑
2285 | 模
2286 | 榴
2287 | 榜
2288 | 榨
2289 | 歌
2290 | 遭
2291 | 酷
2292 | 酿
2293 | 酸
2294 | 磁
2295 | 愿
2296 | 需
2297 | 弊
2298 | 裳
2299 | 颗
2300 | 嗽
2301 | 蜻
2302 | 蜡
2303 | 蝇
2304 | 蜘
2305 | 赚
2306 | 锹
2307 | 锻
2308 | 舞
2309 | 稳
2310 | 算
2311 | 箩
2312 | 管
2313 | 僚
2314 | 鼻
2315 | 魄
2316 | 貌
2317 | 膜
2318 | 膊
2319 | 膀
2320 | 鲜
2321 | 疑
2322 | 馒
2323 | 裹
2324 | 敲
2325 | 豪
2326 | 膏
2327 | 遮
2328 | 腐
2329 | 瘦
2330 | 辣
2331 | 竭
2332 | 端
2333 | 旗
2334 | 精
2335 | 歉
2336 | 熄
2337 | 熔
2338 | 漆
2339 | 漂
2340 | 漫
2341 | 滴
2342 | 演
2343 | 漏
2344 | 慢
2345 | 寨
2346 | 赛
2347 | 察
2348 | 蜜
2349 | 谱
2350 | 嫩
2351 | 翠
2352 | 熊
2353 | 凳
2354 | 骡
2355 | 缩
2356 | 慧
2357 | 撕
2358 | 撒
2359 | 趣
2360 | 趟
2361 | 撑
2362 | 播
2363 | 撞
2364 | 撤
2365 | 增
2366 | 聪
2367 | 鞋
2368 | 蕉
2369 | 蔬
2370 | 横
2371 | 槽
2372 | 樱
2373 | 橡
2374 | 飘
2375 | 醋
2376 | 醉
2377 | 震
2378 | 霉
2379 | 瞒
2380 | 题
2381 | 暴
2382 | 瞎
2383 | 影
2384 | 踢
2385 | 踏
2386 | 踩
2387 | 踪
2388 | 蝶
2389 | 蝴
2390 | 嘱
2391 | 墨
2392 | 镇
2393 | 靠
2394 | 稻
2395 | 黎
2396 | 稿
2397 | 稼
2398 | 箱
2399 | 箭
2400 | 篇
2401 | 僵
2402 | 躺
2403 | 僻
2404 | 德
2405 | 艘
2406 | 膝
2407 | 膛
2408 | 熟
2409 | 摩
2410 | 颜
2411 | 毅
2412 | 糊
2413 | 遵
2414 | 潜
2415 | 潮
2416 | 懂
2417 | 额
2418 | 慰
2419 | 劈
2420 | 操
2421 | 燕
2422 | 薯
2423 | 薪
2424 | 薄
2425 | 颠
2426 | 橘
2427 | 整
2428 | 融
2429 | 醒
2430 | 餐
2431 | 嘴
2432 | 蹄
2433 | 器
2434 | 赠
2435 | 默
2436 | 镜
2437 | 赞
2438 | 篮
2439 | 邀
2440 | 衡
2441 | 膨
2442 | 雕
2443 | 磨
2444 | 凝
2445 | 辨
2446 | 辩
2447 | 糖
2448 | 糕
2449 | 燃
2450 | 澡
2451 | 激
2452 | 懒
2453 | 壁
2454 | 避
2455 | 缴
2456 | 戴
2457 | 擦
2458 | 鞠
2459 | 藏
2460 | 霜
2461 | 霞
2462 | 瞧
2463 | 蹈
2464 | 螺
2465 | 穗
2466 | 繁
2467 | 辫
2468 | 赢
2469 | 糟
2470 | 糠
2471 | 燥
2472 | 臂
2473 | 翼
2474 | 骤
2475 | 鞭
2476 | 覆
2477 | 蹦
2478 | 镰
2479 | 翻
2480 | 鹰
2481 | 警
2482 | 攀
2483 | 蹲
2484 | 颤
2485 | 瓣
2486 | 爆
2487 | 疆
2488 | 壤
2489 | 耀
2490 | 躁
2491 | 嚼
2492 | 嚷
2493 | 籍
2494 | 魔
2495 | 灌
2496 | 蠢
2497 | 霸
2498 | 露
2499 | 囊
2500 | 罐
2501 | 匕
2502 | 刁
2503 | 丐
2504 | 歹
2505 | 戈
2506 | 夭
2507 | 仑
2508 | 讥
2509 | 冗
2510 | 邓
2511 | 艾
2512 | 夯
2513 | 凸
2514 | 卢
2515 | 叭
2516 | 叽
2517 | 皿
2518 | 凹
2519 | 囚
2520 | 矢
2521 | 乍
2522 | 尔
2523 | 冯
2524 | 玄
2525 | 邦
2526 | 迂
2527 | 邢
2528 | 芋
2529 | 芍
2530 | 吏
2531 | 夷
2532 | 吁
2533 | 吕
2534 | 吆
2535 | 屹
2536 | 廷
2537 | 迄
2538 | 臼
2539 | 仲
2540 | 伦
2541 | 伊
2542 | 肋
2543 | 旭
2544 | 匈
2545 | 凫
2546 | 妆
2547 | 亥
2548 | 汛
2549 | 讳
2550 | 讶
2551 | 讹
2552 | 讼
2553 | 诀
2554 | 弛
2555 | 阱
2556 | 驮
2557 | 驯
2558 | 纫
2559 | 玖
2560 | 玛
2561 | 韧
2562 | 抠
2563 | 扼
2564 | 汞
2565 | 扳
2566 | 抡
2567 | 坎
2568 | 坞
2569 | 抑
2570 | 拟
2571 | 抒
2572 | 芙
2573 | 芜
2574 | 苇
2575 | 芥
2576 | 芯
2577 | 芭
2578 | 杖
2579 | 杉
2580 | 巫
2581 | 杈
2582 | 甫
2583 | 匣
2584 | 轩
2585 | 卤
2586 | 肖
2587 | 吱
2588 | 吠
2589 | 呕
2590 | 呐
2591 | 吟
2592 | 呛
2593 | 吻
2594 | 吭
2595 | 邑
2596 | 囤
2597 | 吮
2598 | 岖
2599 | 牡
2600 | 佑
2601 | 佃
2602 | 伺
2603 | 囱
2604 | 肛
2605 | 肘
2606 | 甸
2607 | 狈
2608 | 鸠
2609 | 彤
2610 | 灸
2611 | 刨
2612 | 庇
2613 | 吝
2614 | 庐
2615 | 闰
2616 | 兑
2617 | 灼
2618 | 沐
2619 | 沛
2620 | 汰
2621 | 沥
2622 | 沦
2623 | 汹
2624 | 沧
2625 | 沪
2626 | 忱
2627 | 诅
2628 | 诈
2629 | 罕
2630 | 屁
2631 | 坠
2632 | 妓
2633 | 姊
2634 | 妒
2635 | 纬
2636 | 玫
2637 | 卦
2638 | 坷
2639 | 坯
2640 | 拓
2641 | 坪
2642 | 坤
2643 | 拄
2644 | 拧
2645 | 拂
2646 | 拙
2647 | 拇
2648 | 拗
2649 | 茉
2650 | 昔
2651 | 苛
2652 | 苫
2653 | 苟
2654 | 苞
2655 | 茁
2656 | 苔
2657 | 枉
2658 | 枢
2659 | 枚
2660 | 枫
2661 | 杭
2662 | 郁
2663 | 矾
2664 | 奈
2665 | 奄
2666 | 殴
2667 | 歧
2668 | 卓
2669 | 昙
2670 | 哎
2671 | 咕
2672 | 呵
2673 | 咙
2674 | 呻
2675 | 咒
2676 | 咆
2677 | 咖
2678 | 帕
2679 | 账
2680 | 贬
2681 | 贮
2682 | 氛
2683 | 秉
2684 | 岳
2685 | 侠
2686 | 侥
2687 | 侣
2688 | 侈
2689 | 卑
2690 | 刽
2691 | 刹
2692 | 肴
2693 | 觅
2694 | 忿
2695 | 瓮
2696 | 肮
2697 | 肪
2698 | 狞
2699 | 庞
2700 | 疟
2701 | 疙
2702 | 疚
2703 | 卒
2704 | 氓
2705 | 炬
2706 | 沽
2707 | 沮
2708 | 泣
2709 | 泞
2710 | 泌
2711 | 沼
2712 | 怔
2713 | 怯
2714 | 宠
2715 | 宛
2716 | 衩
2717 | 祈
2718 | 诡
2719 | 帚
2720 | 屉
2721 | 弧
2722 | 弥
2723 | 陋
2724 | 陌
2725 | 函
2726 | 姆
2727 | 虱
2728 | 叁
2729 | 绅
2730 | 驹
2731 | 绊
2732 | 绎
2733 | 契
2734 | 贰
2735 | 玷
2736 | 玲
2737 | 珊
2738 | 拭
2739 | 拷
2740 | 拱
2741 | 挟
2742 | 垢
2743 | 垛
2744 | 拯
2745 | 荆
2746 | 茸
2747 | 茬
2748 | 荚
2749 | 茵
2750 | 茴
2751 | 荞
2752 | 荠
2753 | 荤
2754 | 荧
2755 | 荔
2756 | 栈
2757 | 柑
2758 | 栅
2759 | 柠
2760 | 枷
2761 | 勃
2762 | 柬
2763 | 砂
2764 | 泵
2765 | 砚
2766 | 鸥
2767 | 轴
2768 | 韭
2769 | 虐
2770 | 昧
2771 | 盹
2772 | 咧
2773 | 昵
2774 | 昭
2775 | 盅
2776 | 勋
2777 | 哆
2778 | 咪
2779 | 哟
2780 | 幽
2781 | 钙
2782 | 钝
2783 | 钠
2784 | 钦
2785 | 钧
2786 | 钮
2787 | 毡
2788 | 氢
2789 | 秕
2790 | 俏
2791 | 俄
2792 | 俐
2793 | 侯
2794 | 徊
2795 | 衍
2796 | 胚
2797 | 胧
2798 | 胎
2799 | 狰
2800 | 饵
2801 | 峦
2802 | 奕
2803 | 咨
2804 | 飒
2805 | 闺
2806 | 闽
2807 | 籽
2808 | 娄
2809 | 烁
2810 | 炫
2811 | 洼
2812 | 柒
2813 | 涎
2814 | 洛
2815 | 恃
2816 | 恍
2817 | 恬
2818 | 恤
2819 | 宦
2820 | 诫
2821 | 诬
2822 | 祠
2823 | 诲
2824 | 屏
2825 | 屎
2826 | 逊
2827 | 陨
2828 | 姚
2829 | 娜
2830 | 蚤
2831 | 骇
2832 | 耘
2833 | 耙
2834 | 秦
2835 | 匿
2836 | 埂
2837 | 捂
2838 | 捍
2839 | 袁
2840 | 捌
2841 | 挫
2842 | 挚
2843 | 捣
2844 | 捅
2845 | 埃
2846 | 耿
2847 | 聂
2848 | 荸
2849 | 莽
2850 | 莱
2851 | 莉
2852 | 莹
2853 | 莺
2854 | 梆
2855 | 栖
2856 | 桦
2857 | 栓
2858 | 桅
2859 | 桩
2860 | 贾
2861 | 酌
2862 | 砸
2863 | 砰
2864 | 砾
2865 | 殉
2866 | 逞
2867 | 哮
2868 | 唠
2869 | 哺
2870 | 剔
2871 | 蚌
2872 | 蚜
2873 | 畔
2874 | 蚣
2875 | 蚪
2876 | 蚓
2877 | 哩
2878 | 圃
2879 | 鸯
2880 | 唁
2881 | 哼
2882 | 唆
2883 | 峭
2884 | 唧
2885 | 峻
2886 | 赂
2887 | 赃
2888 | 钾
2889 | 铆
2890 | 氨
2891 | 秫
2892 | 笆
2893 | 俺
2894 | 赁
2895 | 倔
2896 | 殷
2897 | 耸
2898 | 舀
2899 | 豺
2900 | 豹
2901 | 颁
2902 | 胯
2903 | 胰
2904 | 脐
2905 | 脓
2906 | 逛
2907 | 卿
2908 | 鸵
2909 | 鸳
2910 | 馁
2911 | 凌
2912 | 凄
2913 | 衷
2914 | 郭
2915 | 斋
2916 | 疹
2917 | 紊
2918 | 瓷
2919 | 羔
2920 | 烙
2921 | 浦
2922 | 涡
2923 | 涣
2924 | 涤
2925 | 涧
2926 | 涕
2927 | 涩
2928 | 悍
2929 | 悯
2930 | 窍
2931 | 诺
2932 | 诽
2933 | 袒
2934 | 谆
2935 | 祟
2936 | 恕
2937 | 娩
2938 | 骏
2939 | 琐
2940 | 麸
2941 | 琉
2942 | 琅
2943 | 措
2944 | 捺
2945 | 捶
2946 | 赦
2947 | 埠
2948 | 捻
2949 | 掐
2950 | 掂
2951 | 掖
2952 | 掷
2953 | 掸
2954 | 掺
2955 | 勘
2956 | 聊
2957 | 娶
2958 | 菱
2959 | 菲
2960 | 萎
2961 | 菩
2962 | 萤
2963 | 乾
2964 | 萧
2965 | 萨
2966 | 菇
2967 | 彬
2968 | 梗
2969 | 梧
2970 | 梭
2971 | 曹
2972 | 酝
2973 | 酗
2974 | 厢
2975 | 硅
2976 | 硕
2977 | 奢
2978 | 盔
2979 | 匾
2980 | 颅
2981 | 彪
2982 | 眶
2983 | 晤
2984 | 曼
2985 | 晦
2986 | 冕
2987 | 啡
2988 | 畦
2989 | 趾
2990 | 啃
2991 | 蛆
2992 | 蚯
2993 | 蛉
2994 | 蛀
2995 | 唬
2996 | 啰
2997 | 唾
2998 | 啤
2999 | 啥
3000 | 啸
3001 | 崎
3002 | 逻
3003 | 崔
3004 | 崩
3005 | 婴
3006 | 赊
3007 | 铐
3008 | 铛
3009 | 铝
3010 | 铡
3011 | 铣
3012 | 铭
3013 | 矫
3014 | 秸
3015 | 秽
3016 | 笙
3017 | 笤
3018 | 偎
3019 | 傀
3020 | 躯
3021 | 兜
3022 | 衅
3023 | 徘
3024 | 徙
3025 | 舶
3026 | 舷
3027 | 舵
3028 | 敛
3029 | 翎
3030 | 脯
3031 | 逸
3032 | 凰
3033 | 猖
3034 | 祭
3035 | 烹
3036 | 庶
3037 | 庵
3038 | 痊
3039 | 阎
3040 | 阐
3041 | 眷
3042 | 焊
3043 | 焕
3044 | 鸿
3045 | 涯
3046 | 淑
3047 | 淌
3048 | 淮
3049 | 淆
3050 | 渊
3051 | 淫
3052 | 淳
3053 | 淤
3054 | 淀
3055 | 涮
3056 | 涵
3057 | 惦
3058 | 悴
3059 | 惋
3060 | 寂
3061 | 窒
3062 | 谍
3063 | 谐
3064 | 裆
3065 | 袱
3066 | 祷
3067 | 谒
3068 | 谓
3069 | 谚
3070 | 尉
3071 | 堕
3072 | 隅
3073 | 婉
3074 | 颇
3075 | 绰
3076 | 绷
3077 | 综
3078 | 绽
3079 | 缀
3080 | 巢
3081 | 琳
3082 | 琢
3083 | 琼
3084 | 揍
3085 | 堰
3086 | 揩
3087 | 揽
3088 | 揖
3089 | 彭
3090 | 揣
3091 | 搀
3092 | 搓
3093 | 壹
3094 | 搔
3095 | 葫
3096 | 募
3097 | 蒋
3098 | 蒂
3099 | 韩
3100 | 棱
3101 | 椰
3102 | 焚
3103 | 椎
3104 | 棺
3105 | 榔
3106 | 椭
3107 | 粟
3108 | 棘
3109 | 酣
3110 | 酥
3111 | 硝
3112 | 硫
3113 | 颊
3114 | 雳
3115 | 翘
3116 | 凿
3117 | 棠
3118 | 晰
3119 | 鼎
3120 | 喳
3121 | 遏
3122 | 晾
3123 | 畴
3124 | 跋
3125 | 跛
3126 | 蛔
3127 | 蜒
3128 | 蛤
3129 | 鹃
3130 | 喻
3131 | 啼
3132 | 喧
3133 | 嵌
3134 | 赋
3135 | 赎
3136 | 赐
3137 | 锉
3138 | 锌
3139 | 甥
3140 | 掰
3141 | 氮
3142 | 氯
3143 | 黍
3144 | 筏
3145 | 牍
3146 | 粤
3147 | 逾
3148 | 腌
3149 | 腋
3150 | 腕
3151 | 猩
3152 | 猬
3153 | 惫
3154 | 敦
3155 | 痘
3156 | 痢
3157 | 痪
3158 | 竣
3159 | 翔
3160 | 奠
3161 | 遂
3162 | 焙
3163 | 滞
3164 | 湘
3165 | 渤
3166 | 渺
3167 | 溃
3168 | 溅
3169 | 湃
3170 | 愕
3171 | 惶
3172 | 寓
3173 | 窖
3174 | 窘
3175 | 雇
3176 | 谤
3177 | 犀
3178 | 隘
3179 | 媒
3180 | 媚
3181 | 婿
3182 | 缅
3183 | 缆
3184 | 缔
3185 | 缕
3186 | 骚
3187 | 瑟
3188 | 鹉
3189 | 瑰
3190 | 搪
3191 | 聘
3192 | 斟
3193 | 靴
3194 | 靶
3195 | 蓖
3196 | 蒿
3197 | 蒲
3198 | 蓉
3199 | 楔
3200 | 椿
3201 | 楷
3202 | 榄
3203 | 楞
3204 | 楣
3205 | 酪
3206 | 碘
3207 | 硼
3208 | 碉
3209 | 辐
3210 | 辑
3211 | 频
3212 | 睹
3213 | 睦
3214 | 瞄
3215 | 嗜
3216 | 嗦
3217 | 暇
3218 | 畸
3219 | 跷
3220 | 跺
3221 | 蜈
3222 | 蜗
3223 | 蜕
3224 | 蛹
3225 | 嗅
3226 | 嗡
3227 | 嗤
3228 | 署
3229 | 蜀
3230 | 幌
3231 | 锚
3232 | 锥
3233 | 锨
3234 | 锭
3235 | 锰
3236 | 稚
3237 | 颓
3238 | 筷
3239 | 魁
3240 | 衙
3241 | 腻
3242 | 腮
3243 | 腺
3244 | 鹏
3245 | 肄
3246 | 猿
3247 | 颖
3248 | 煞
3249 | 雏
3250 | 馍
3251 | 馏
3252 | 禀
3253 | 痹
3254 | 廓
3255 | 痴
3256 | 靖
3257 | 誊
3258 | 漓
3259 | 溢
3260 | 溯
3261 | 溶
3262 | 滓
3263 | 溺
3264 | 寞
3265 | 窥
3266 | 窟
3267 | 寝
3268 | 褂
3269 | 裸
3270 | 谬
3271 | 媳
3272 | 嫉
3273 | 缚
3274 | 缤
3275 | 剿
3276 | 赘
3277 | 熬
3278 | 赫
3279 | 蔫
3280 | 摹
3281 | 蔓
3282 | 蔗
3283 | 蔼
3284 | 熙
3285 | 蔚
3286 | 兢
3287 | 榛
3288 | 榕
3289 | 酵
3290 | 碟
3291 | 碴
3292 | 碱
3293 | 碳
3294 | 辕
3295 | 辖
3296 | 雌
3297 | 墅
3298 | 嘁
3299 | 踊
3300 | 蝉
3301 | 嘀
3302 | 幔
3303 | 镀
3304 | 舔
3305 | 熏
3306 | 箍
3307 | 箕
3308 | 箫
3309 | 舆
3310 | 僧
3311 | 孵
3312 | 瘩
3313 | 瘟
3314 | 彰
3315 | 粹
3316 | 漱
3317 | 漩
3318 | 漾
3319 | 慷
3320 | 寡
3321 | 寥
3322 | 谭
3323 | 褐
3324 | 褪
3325 | 隧
3326 | 嫡
3327 | 缨
3328 | 撵
3329 | 撩
3330 | 撮
3331 | 撬
3332 | 擒
3333 | 墩
3334 | 撰
3335 | 鞍
3336 | 蕊
3337 | 蕴
3338 | 樊
3339 | 樟
3340 | 橄
3341 | 敷
3342 | 豌
3343 | 醇
3344 | 磕
3345 | 磅
3346 | 碾
3347 | 憋
3348 | 嘶
3349 | 嘲
3350 | 嘹
3351 | 蝠
3352 | 蝎
3353 | 蝌
3354 | 蝗
3355 | 蝙
3356 | 嘿
3357 | 幢
3358 | 镊
3359 | 镐
3360 | 稽
3361 | 篓
3362 | 膘
3363 | 鲤
3364 | 鲫
3365 | 褒
3366 | 瘪
3367 | 瘤
3368 | 瘫
3369 | 凛
3370 | 澎
3371 | 潭
3372 | 潦
3373 | 澳
3374 | 潘
3375 | 澈
3376 | 澜
3377 | 澄
3378 | 憔
3379 | 懊
3380 | 憎
3381 | 翩
3382 | 褥
3383 | 谴
3384 | 鹤
3385 | 憨
3386 | 履
3387 | 嬉
3388 | 豫
3389 | 缭
3390 | 撼
3391 | 擂
3392 | 擅
3393 | 蕾
3394 | 薛
3395 | 薇
3396 | 擎
3397 | 翰
3398 | 噩
3399 | 橱
3400 | 橙
3401 | 瓢
3402 | 蟥
3403 | 霍
3404 | 霎
3405 | 辙
3406 | 冀
3407 | 踱
3408 | 蹂
3409 | 蟆
3410 | 螃
3411 | 螟
3412 | 噪
3413 | 鹦
3414 | 黔
3415 | 穆
3416 | 篡
3417 | 篷
3418 | 篙
3419 | 篱
3420 | 儒
3421 | 膳
3422 | 鲸
3423 | 瘾
3424 | 瘸
3425 | 糙
3426 | 燎
3427 | 濒
3428 | 憾
3429 | 懈
3430 | 窿
3431 | 缰
3432 | 壕
3433 | 藐
3434 | 檬
3435 | 檐
3436 | 檩
3437 | 檀
3438 | 礁
3439 | 磷
3440 | 瞭
3441 | 瞬
3442 | 瞳
3443 | 瞪
3444 | 曙
3445 | 蹋
3446 | 蟋
3447 | 蟀
3448 | 嚎
3449 | 赡
3450 | 镣
3451 | 魏
3452 | 簇
3453 | 儡
3454 | 徽
3455 | 爵
3456 | 朦
3457 | 臊
3458 | 鳄
3459 | 糜
3460 | 癌
3461 | 懦
3462 | 豁
3463 | 臀
3464 | 藕
3465 | 藤
3466 | 瞻
3467 | 嚣
3468 | 鳍
3469 | 癞
3470 | 瀑
3471 | 襟
3472 | 璧
3473 | 戳
3474 | 攒
3475 | 孽
3476 | 蘑
3477 | 藻
3478 | 鳖
3479 | 蹭
3480 | 蹬
3481 | 簸
3482 | 簿
3483 | 蟹
3484 | 靡
3485 | 癣
3486 | 羹
3487 | 鬓
3488 | 攘
3489 | 蠕
3490 | 巍
3491 | 鳞
3492 | 糯
3493 | 譬
3494 | 霹
3495 | 躏
3496 | 髓
3497 | 蘸
3498 | 镶
3499 | 瓤
3500 | 矗
3501 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
  1 | 
  2 | ## Solutions of autochecker for chinese
  3 | 
  4 | ### How to use :
  5 | - run in the terminal : python Autochecker4Chinese.py
  6 | - You will get the following result : ![](./result.png)
  7 | 
  8 | 
  9 | ### 1. Make a detecter
 10 | 
 11 | - Construct a dict to detect the misspelled chinese phrase，key is the chinese phrase, value is its corresponding frequency appeared in corpus.
 12 | - You can finish this step by collecting corpus from the internet, or you can choose a more easy way, load some dicts already created by others. Here we choose the second way, construct the dict from file. 
 13 | - The detecter works in this way: for any phrase not appeared in this dict, the detecter will detect it as a mis-spelled phrase.
 14 | 
 15 | 
 16 | 
 17 | ```python
 18 | def construct_dict( file_path ):
 19 |     
 20 |     word_freq = {}
 21 |     with open(file_path, "r") as f:
 22 |         for line in f:
 23 |             info = line.split()
 24 |             word = info[0]
 25 |             frequency = info[1]
 26 |             word_freq[word] = frequency
 27 |     
 28 |     return word_freq
 29 | ```
 30 | 
 31 | 
 32 | ```python
 33 | FILE_PATH = "./token_freq_pos%40350k_jieba.txt"
 34 | phrase_freq = construct_dict( FILE_PATH )
 35 | ```
 36 | 
 37 | 
 38 | ```python
 39 | print( type(phrase_freq) )
 40 | print( len(phrase_freq) )
 41 | ```
 42 | 
 43 |     <type 'dict'>
 44 |     349045
 45 | 
 46 | 
 47 | ### 2. Make an autocorrecter
 48 | - Make an autocorrecter for the misspelled phrase, we use the edit distance to make a correct-candidate list for the mis-spelled phrase 
 49 | - We sort the correct-candidate list according to the likelyhood of  being the correct phrase, based on the following rules:
 50 | 	- If the candidate's pinyin matches exactly with misspelled phrase's pinyin, we put the candidate in first order, which means they are the most likely phrase to be selected.
 51 | 	- Else if candidate first word's pinyin matches with misspelled phrase's first word's pinyin, we put the candidate in second order.
 52 | 	- Otherwise, we put the candidate in third order.
 53 | 
 54 | ```python
 55 | import pinyin
 56 | ```
 57 | 
 58 | 
 59 | ```python
 60 | # list for chinese words
 61 | # read from the words.dic
 62 | def load_cn_words_dict( file_path ):
 63 |     cn_words_dict = ""
 64 |     with open(file_path, "r") as f:
 65 |         for word in f:
 66 |             cn_words_dict += word.strip().decode("utf-8")
 67 |     return cn_words_dict
 68 | ```
 69 | 
 70 | 
 71 | ```python
 72 | # function calculate the edite distance from the chinese phrase 
 73 | def edits1(phrase, cn_words_dict):
 74 |     "All edits that are one edit away from `phrase`."
 75 |     phrase = phrase.decode("utf-8")
 76 |     splits     = [(phrase[:i], phrase[i:])  for i in range(len(phrase) + 1)]
 77 |     deletes    = [L + R[1:]                 for L, R in splits if R]
 78 |     transposes = [L + R[1] + R[0] + R[2:]   for L, R in splits if len(R)>1]
 79 |     replaces   = [L + c + R[1:]             for L, R in splits if R for c in cn_words_dict]
 80 |     inserts    = [L + c + R                 for L, R in splits for c in cn_words_dict]
 81 |     return set(deletes + transposes + replaces + inserts)
 82 | ```
 83 | 
 84 | 
 85 | ```python
 86 | # return the phrease exist in phrase_freq
 87 | def known(phrases): return set(phrase for phrase in phrases if phrase.encode("utf-8") in phrase_freq)
 88 | ```
 89 | 
 90 | 
 91 | ```python
 92 | # get the candidates phrase of the error phrase
 93 | # we sort the candidates phrase's importance according to their pinyin
 94 | # if the candidate phrase's pinyin exactly matches with the error phrase, we put them into first order
 95 | # if the candidate phrase's first word pinyin matches with the error phrase first word, we put them into second order
 96 | # else we put candidate phrase into the third order
 97 | def get_candidates( error_phrase ):
 98 |     
 99 |     candidates_1st_order = []
100 |     candidates_2nd_order = []
101 |     candidates_3nd_order = []
102 |     
103 |     error_pinyin = pinyin.get(error_phrase, format="strip", delimiter="/").encode("utf-8")
104 |     cn_words_dict = load_cn_words_dict( "./cn_dict.txt" )
105 |     candidate_phrases = list( known(edits1(error_phrase, cn_words_dict)) )
106 |     
107 |     for candidate_phrase in candidate_phrases:
108 |         candidate_pinyin = pinyin.get(candidate_phrase, format="strip", delimiter="/").encode("utf-8")
109 |         if candidate_pinyin == error_pinyin:
110 |             candidates_1st_order.append(candidate_phrase)
111 |         elif candidate_pinyin.split("/")[0] == error_pinyin.split("/")[0]:
112 |             candidates_2nd_order.append(candidate_phrase)
113 |         else:
114 |             candidates_3nd_order.append(candidate_phrase)
115 |     
116 |     return candidates_1st_order, candidates_2nd_order, candidates_3nd_order
117 | ```
118 | 
119 | 
120 | ```python
121 | def auto_correct( error_phrase ):
122 |     
123 |     c1_order, c2_order, c3_order = get_candidates(error_phrase)
124 |     # print c1_order, c2_order, c3_order
125 |     if c1_order:
126 |         return max(c1_order, key=phrase_freq.get )
127 |     elif c2_order:
128 |         return max(c2_order, key=phrase_freq.get )
129 |     else:
130 |         return max(c3_order, key=phrase_freq.get )
131 | ```
132 | 
133 | 
134 | ```python
135 | # test for the auto_correct 
136 | error_phrase_1 = "呕涂" # should be "呕吐"
137 | error_phrase_2 = "东方之朱" # should be "东方之珠"
138 | error_phrase_3 = "沙拢" # should be "沙龙"
139 | 
140 | print error_phrase_1, auto_correct( error_phrase_1 )
141 | print error_phrase_2, auto_correct( error_phrase_2 )
142 | print error_phrase_3, auto_correct( error_phrase_3 )
143 | ```
144 | 
145 |     呕涂 呕吐
146 |     东方之朱 东方之珠
147 |     沙拢 沙龙
148 | 
149 | 
150 | ### 3. Correct the misspelled phrase in a sentance 
151 | 
152 | 
153 | 
154 | - For any given sentence, use jieba do the segmentation, 
155 | - Get segment list after segmentation is done, check if the remain phrase exists in word_freq dict, if not, then it is a misspelled phrase
156 | - Use auto_correct function to correct the misspelled phrase
157 | - Output the correct sentence
158 | 
159 | 
160 | 
161 | ```python
162 | import jieba
163 | import string
164 | import re
165 | ```
166 | 
167 | 
168 | ```python
169 | PUNCTUATION_LIST = string.punctuation
170 | PUNCTUATION_LIST += "。，？：；｛｝［］‘“”《》／！％……（）"
171 | ```
172 | 
173 | 
174 | ```python
175 | def auto_correct_sentence( error_sentence, verbose=True):
176 |     
177 |     jieba_cut = jieba.cut(err_test.decode("utf-8"), cut_all=False)
178 |     seg_list = "\t".join(jieba_cut).split("\t")
179 |     
180 |     correct_sentence = ""
181 |     
182 |     for phrase in seg_list:
183 |         
184 |         correct_phrase = phrase
185 |         # check if item is a punctuation
186 |         if phrase not in PUNCTUATION_LIST.decode("utf-8"):
187 |             # check if the phrase in our dict, if not then it is a misspelled phrase
188 |             if phrase.encode("utf-8") not in phrase_freq.keys():
189 |                 correct_phrase = auto_correct(phrase.encode("utf-8"))
190 |                 if verbose :
191 |                     print phrase, correct_phrase
192 |     
193 |         correct_sentence += correct_phrase
194 |     
195 |     if verbose:
196 |         print correct_sentence
197 |     return correct_sentence
198 | ```
199 | 
200 | 
201 | ```python
202 | err_sent = '机七学习是人工智能领遇最能体现智能的一个分知！'
203 | correct_sent = auto_correct_sentence( err_sent )
204 | ```
205 | 
206 |     机七 机器
207 |     领遇 领域
208 |     分知 分枝
209 |     机器学习是人工智能领域最能体现智能的一个分枝！
210 | 
211 | 
212 | 
213 | ```python
214 | print correct_sent
215 | ```
216 | 
217 |     机器学习是人工智能领域最能体现智能的一个分枝！
218 | 
219 | 
220 | 
221 | ```python
222 | 
223 | ```
224 | 


--------------------------------------------------------------------------------
/readme.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/beyondacm/Autochecker4Chinese/ca27a2aed69b79cdc639fc088b0a2f942c2a81f5/readme.pdf


--------------------------------------------------------------------------------
/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/beyondacm/Autochecker4Chinese/ca27a2aed69b79cdc639fc088b0a2f942c2a81f5/result.png


--------------------------------------------------------------------------------
/token_pinyin%4040k_sogou.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/beyondacm/Autochecker4Chinese/ca27a2aed69b79cdc639fc088b0a2f942c2a81f5/token_pinyin%4040k_sogou.txt


--------------------------------------------------------------------------------