├── .gitignore ├── README.md ├── egs └── aishell │ ├── cmd.sh │ ├── conf │ └── fbank.conf │ ├── local │ ├── aishell_data_prep.sh │ └── score.sh │ ├── path.sh │ ├── run.sh │ ├── steps │ └── utils ├── requirements.txt ├── src ├── __init__.py ├── bin │ ├── example.sh │ ├── recognize.py │ └── train.py ├── data │ ├── __init__.py │ └── data.py ├── models │ ├── __init__.py │ ├── attention.py │ ├── decoder.py │ ├── encoder.py │ └── seq2seq.py ├── solver │ ├── __init__.py │ └── solver.py └── utils │ ├── __init__.py │ ├── data2json.sh │ ├── dump.sh │ ├── filt.py │ ├── json2trn.py │ ├── mergejson.py │ ├── scp2json.py │ ├── text2token.py │ └── utils.py ├── test ├── data │ ├── data.json │ └── train_nodup_sp_units.txt ├── learn_pytorch.py ├── learn_visdom.py ├── path.sh ├── test_attention.py ├── test_data.py ├── test_decoder.py ├── test_encoder.py └── test_seq2seq.py └── tools ├── Makefile └── kaldi-io-for-python.tar.gz /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | 3 | src/utils/kaldi_io.py 4 | 5 | tools/kaldi-io-for-python/ 6 | tools/kaldi 7 | 8 | egs/*/data 9 | egs/*/dump 10 | egs/*/fbank 11 | egs/*/exp 12 | 13 | .nfs* 14 | .vscode 15 | __pycache__ 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Listen, Attend and Spell 2 | A PyTorch implementation of Listen, Attend and Spell (LAS) [1], an end-to-end automatic speech recognition framework, which directly converts acoustic features to character sequence using only one nueral network. 3 | 4 | ## Install 5 | - Python3 (Recommend Anaconda) 6 | - PyTorch 0.4.1+ 7 | - [Kaldi](https://github.com/kaldi-asr/kaldi) (Just for feature extraction) 8 | - `pip install -r requirements.txt` 9 | - `cd tools; make KALDI=/path/to/kaldi` 10 | - If you want to run `egs/aishell/run.sh`, download [aishell](http://www.openslr.org/33/) dataset for free. 11 | 12 | ## Usage 13 | 1. `$ cd egs/aishell` and modify aishell data path to your path in `run.sh`. 14 | 2. `$ bash run.sh`, that's all! 15 | 16 | You can change hyper-parameter by `$ bash run.sh --parameter_name parameter_value`, egs, `$ bash run.sh --stage 3`. See parameter name in `egs/aishell/run.sh` before `. utils/parse_options.sh`. 17 | ### More detail 18 | ```bash 19 | $ cd egs/aishell/ 20 | $ . ./path.sh 21 | ``` 22 | Train 23 | ```bash 24 | $ train.py -h 25 | ``` 26 | Decode 27 | ```bash 28 | $ recognize.py -h 29 | ``` 30 | ### Workflow 31 | Workflow of `egs/aishell/run.sh`: 32 | - Stage 0: Data Preparation 33 | - Stage 1: Feature Generation 34 | - Stage 2: Dictionary and Json Data Preparation 35 | - Stage 3: Network Training 36 | - Stage 4: Decoding 37 | ### Visualize loss 38 | If you want to visualize your loss, you can use `visdom` to do that: 39 | - Open a new terminal in your remote server (recommend tmux) and run `$ visdom`. 40 | - Open a new terminal and run `$ bash run.sh --visdom 1 --visdom_id ""` or `$ train.py ... --visdom 1 --vidsdom_id ""`. 41 | - Open your browser and type `:8097`, egs, `127.0.0.1:8097`. 42 | - In visdom website, chose `` in `Environment` to see your loss. 43 | 44 | ## Results 45 | | Model | CER | Config | 46 | | :---: | :-: | :----: | 47 | | LSTMP | 9.85| 4x(1024-512) | 48 | | Listen, Attend and Spell | 13.2 | See egs/aishell/run.sh | 49 | 50 | ## Reference 51 | [1] W. Chan, N. Jaitly, Q. Le, and O. Vinyals, “Listen, attend and spell: A neural network for large vocabulary conversational speech recognition,” in ICASSP 2016. (https://arxiv.org/abs/1508.01211v2) 52 | -------------------------------------------------------------------------------- /egs/aishell/cmd.sh: -------------------------------------------------------------------------------- 1 | # you can change cmd.sh depending on what type of queue you are using. 2 | # If you have no queueing system and want to run on a local machine, you 3 | # can change all instances 'queue.pl' to run.pl (but be careful and run 4 | # commands one by one: most recipes will exhaust the memory on your 5 | # machine). queue.pl works with GridEngine (qsub). slurm.pl works 6 | # with slurm. Different queues are configured differently, with different 7 | # queue names and different ways of specifying things like memory; 8 | # to account for these differences you can create and edit the file 9 | # conf/queue.conf to match your queue's configuration. Search for 10 | # conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information, 11 | # or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl. 12 | 13 | export train_cmd="run.pl --mem 2G" 14 | export cuda_cmd="run.pl --mem 2G --gpu 1" 15 | export decode_cmd="run.pl --mem 4G" 16 | 17 | # NHU setup 18 | # export train_cmd="queue.pl -q all.q --mem 2G" 19 | # export cuda_cmd="/home/work_nfs/common/tools/pyqueue_asr.pl --mem 2G --gpu 1" 20 | # export decode_cmd="/home/work_nfs/common/tools/pyqueue_asr.pl --mem 4G --gpu 1" 21 | #export cuda_cmd="queue.pl --mem 2G --gpu 1 --config conf/gpu.conf" 22 | #export decode_cmd="queue.pl -q all.q --mem 4G" 23 | -------------------------------------------------------------------------------- /egs/aishell/conf/fbank.conf: -------------------------------------------------------------------------------- 1 | --sample-frequency=16000 2 | --num-mel-bins=80 -------------------------------------------------------------------------------- /egs/aishell/local/aishell_data_prep.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2017 Xingyu Na 4 | # Apache 2.0 5 | 6 | . ./path.sh || exit 1; 7 | 8 | if [ $# != 2 ]; then 9 | echo "Usage: $0 " 10 | echo " $0 /export/a05/xna/data/data_aishell/wav /export/a05/xna/data/data_aishell/transcript" 11 | exit 1; 12 | fi 13 | 14 | aishell_audio_dir=$1 15 | aishell_text=$2/aishell_transcript_v0.8.txt 16 | 17 | train_dir=data/local/train 18 | dev_dir=data/local/dev 19 | test_dir=data/local/test 20 | tmp_dir=data/local/tmp 21 | 22 | mkdir -p $train_dir 23 | mkdir -p $dev_dir 24 | mkdir -p $test_dir 25 | mkdir -p $tmp_dir 26 | 27 | # data directory check 28 | if [ ! -d $aishell_audio_dir ] || [ ! -f $aishell_text ]; then 29 | echo "Error: $0 requires two directory arguments" 30 | exit 1; 31 | fi 32 | 33 | # find wav audio file for train, dev and test resp. 34 | find $aishell_audio_dir -iname "*.wav" > $tmp_dir/wav.flist 35 | n=`cat $tmp_dir/wav.flist | wc -l` 36 | [ $n -ne 141925 ] && \ 37 | echo Warning: expected 141925 data data files, found $n 38 | 39 | grep -i "wav/train" $tmp_dir/wav.flist > $train_dir/wav.flist || exit 1; 40 | grep -i "wav/dev" $tmp_dir/wav.flist > $dev_dir/wav.flist || exit 1; 41 | grep -i "wav/test" $tmp_dir/wav.flist > $test_dir/wav.flist || exit 1; 42 | 43 | rm -r $tmp_dir 44 | 45 | # Transcriptions preparation 46 | for dir in $train_dir $dev_dir $test_dir; do 47 | echo Preparing $dir transcriptions 48 | sed -e 's/\.wav//' $dir/wav.flist | awk -F '/' '{print $NF}' > $dir/utt.list 49 | sed -e 's/\.wav//' $dir/wav.flist | awk -F '/' '{i=NF-1;printf("%s %s\n",$NF,$i)}' > $dir/utt2spk_all 50 | paste -d' ' $dir/utt.list $dir/wav.flist > $dir/wav.scp_all 51 | utils/filter_scp.pl -f 1 $dir/utt.list $aishell_text > $dir/transcripts.txt 52 | awk '{print $1}' $dir/transcripts.txt > $dir/utt.list 53 | utils/filter_scp.pl -f 1 $dir/utt.list $dir/utt2spk_all | sort -u > $dir/utt2spk 54 | utils/filter_scp.pl -f 1 $dir/utt.list $dir/wav.scp_all | sort -u > $dir/wav.scp 55 | sort -u $dir/transcripts.txt > $dir/text 56 | utils/utt2spk_to_spk2utt.pl $dir/utt2spk > $dir/spk2utt 57 | done 58 | 59 | mkdir -p data/train data/dev data/test 60 | 61 | for f in spk2utt utt2spk wav.scp text; do 62 | cp $train_dir/$f data/train/$f || exit 1; 63 | cp $dev_dir/$f data/dev/$f || exit 1; 64 | cp $test_dir/$f data/test/$f || exit 1; 65 | done 66 | 67 | echo "$0: AISHELL data preparation succeeded" 68 | exit 0; 69 | -------------------------------------------------------------------------------- /egs/aishell/local/score.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | [ -f path.sh ] && . ./path.sh 4 | 5 | nlsyms="" 6 | 7 | . utils/parse_options.sh 8 | 9 | if [ $# != 2 ]; then 10 | echo "Usage: $0 " 11 | exit 1 12 | fi 13 | 14 | dir=$1 15 | dic=$2 16 | 17 | json2trn.py ${dir}/data.json ${dic} ${dir}/ref.trn ${dir}/hyp.trn 18 | 19 | if [ ! -z ${nlsyms} ]; then 20 | cp ${dir}/ref.trn ${dir}/ref.trn.org 21 | cp ${dir}/hyp.trn ${dir}/hyp.trn.org 22 | filt.py -v $nlsyms ${dir}/ref.trn.org > ${dir}/ref.trn 23 | filt.py -v $nlsyms ${dir}/hyp.trn.org > ${dir}/hyp.trn 24 | fi 25 | 26 | sclite -r ${dir}/ref.trn trn -h ${dir}/hyp.trn trn -i rm -o all stdout > ${dir}/result.txt 27 | 28 | echo "write a CER (or TER) result in ${dir}/result.txt" 29 | grep -e Avg -e SPKR -m 2 ${dir}/result.txt 30 | -------------------------------------------------------------------------------- /egs/aishell/path.sh: -------------------------------------------------------------------------------- 1 | MAIN_ROOT=$PWD/../.. 2 | KALDI_ROOT=$MAIN_ROOT/tools/kaldi 3 | SRC_ROOT=$MAIN_ROOT/src 4 | 5 | # BEGIN from kaldi path.sh 6 | [ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh 7 | export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sctk/bin:$PWD:$PATH 8 | [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 9 | . $KALDI_ROOT/tools/config/common_path.sh 10 | export LC_ALL=C 11 | # END 12 | 13 | export PATH=/home/work_nfs/ktxu/tools/anaconda3/bin:$PATH 14 | export PATH=$SRC_ROOT/bin/:$SRC_ROOT/utils/:$PATH 15 | export PYTHONPATH=$SRC_ROOT/data/:$SRC_ROOT/models/:$SRC_ROOT/solver/:$SRC_ROOT/utils/:$PYTHONPATH 16 | -------------------------------------------------------------------------------- /egs/aishell/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # -- IMPORTANT 4 | data=/home/work_nfs/common/data # Modify to your aishell data path 5 | stage=-1 # Modify to control start from witch stage 6 | # -- 7 | 8 | ngpu=1 # number of gpus ("0" uses cpu, otherwise use gpu) 9 | nj=40 10 | 11 | dumpdir=dump # directory to dump full features 12 | 13 | # Feature configuration 14 | do_delta=true 15 | 16 | # Network architecture 17 | # Encoder 18 | einput=240 19 | ehidden=256 20 | elayer=3 21 | edropout=0.2 22 | ebidirectional=1 23 | etype=lstm 24 | # Attention 25 | atype=dot 26 | # Decoder 27 | dembed=512 28 | dhidden=512 29 | dlayer=1 30 | 31 | # Training config 32 | epochs=20 33 | half_lr=1 34 | early_stop=0 35 | max_norm=5 36 | batch_size=32 37 | maxlen_in=800 38 | maxlen_out=150 39 | # optimizer 40 | optimizer=adam 41 | lr=1e-3 42 | momentum=0 43 | l2=1e-5 44 | # logging and visualize 45 | checkpoint=0 46 | continue_from="" 47 | print_freq=10 48 | visdom=0 49 | visdom_id="LAS Training" 50 | 51 | # Decode config 52 | beam_size=30 53 | nbest=1 54 | decode_max_len=100 55 | 56 | # exp tag 57 | tag="" # tag for managing experiments. 58 | 59 | . utils/parse_options.sh || exit 1; 60 | . ./cmd.sh 61 | . ./path.sh 62 | 63 | if [ $stage -le 0 ]; then 64 | echo "Stage 0: Data Preparation" 65 | ### Task dependent. You have to make data the following preparation part by yourself. 66 | ### But you can utilize Kaldi recipes in most cases 67 | # Generate wav.scp, text, utt2spk, spk2utt (segments) 68 | local/aishell_data_prep.sh $data/data_aishell/wav $data/data_aishell/transcript || exit 1; 69 | # remove space in text 70 | for x in train test dev; do 71 | cp data/${x}/text data/${x}/text.org 72 | paste -d " " <(cut -f 1 -d" " data/${x}/text.org) <(cut -f 2- -d" " data/${x}/text.org | tr -d " ") \ 73 | > data/${x}/text 74 | done 75 | fi 76 | 77 | feat_train_dir=${dumpdir}/train/delta${do_delta}; mkdir -p ${feat_train_dir} 78 | feat_test_dir=${dumpdir}/test/delta${do_delta}; mkdir -p ${feat_test_dir} 79 | feat_dev_dir=${dumpdir}/dev/delta${do_delta}; mkdir -p ${feat_dev_dir} 80 | if [ $stage -le 1 ]; then 81 | echo "Stage 1: Feature Generation" 82 | ### Task dependent. You have to make data the following preparation part by yourself. 83 | ### But you can utilize Kaldi recipes in most cases 84 | fbankdir=fbank 85 | for data in train test dev; do 86 | steps/make_fbank.sh --cmd "$train_cmd" --nj $nj --write_utt2num_frames true \ 87 | data/$data exp/make_fbank/$data $fbankdir/$data || exit 1; 88 | done 89 | # compute global CMVN 90 | compute-cmvn-stats scp:data/train/feats.scp data/train/cmvn.ark 91 | # dump features for training 92 | for data in train test dev; do 93 | feat_dir=`eval echo '$feat_'${data}'_dir'` 94 | dump.sh --cmd "$train_cmd" --nj $nj --do_delta $do_delta \ 95 | data/$data/feats.scp data/train/cmvn.ark exp/dump_feats/$data $feat_dir 96 | done 97 | fi 98 | 99 | dict=data/lang_1char/train_chars.txt 100 | echo "dictionary: ${dict}" 101 | nlsyms=data/lang_1char/non_lang_syms.txt 102 | if [ $stage -le 2 ]; then 103 | echo "Stage 2: Dictionary and Json Data Preparation" 104 | ### Task dependent. You have to check non-linguistic symbols used in the corpus. 105 | mkdir -p data/lang_1char/ 106 | 107 | echo "make a non-linguistic symbol list" 108 | # It's empty in AISHELL-1 109 | cut -f 2- data/train/text | grep -o -P '\[.*?\]' | sort | uniq > ${nlsyms} 110 | cat ${nlsyms} 111 | 112 | echo "make a dictionary" 113 | echo " 0" > ${dict} 114 | echo " 1" >> ${dict} 115 | echo " 2" >> ${dict} 116 | text2token.py -s 1 -n 1 -l ${nlsyms} data/train/text | cut -f 2- -d" " | tr " " "\n" \ 117 | | sort | uniq | grep -v -e '^\s*$' | awk '{print $0 " " NR+2}' >> ${dict} 118 | wc -l ${dict} 119 | 120 | echo "make json files" 121 | for data in train test dev; do 122 | feat_dir=`eval echo '$feat_'${data}'_dir'` 123 | data2json.sh --feat ${feat_dir}/feats.scp --nlsyms ${nlsyms} \ 124 | data/$data ${dict} > ${feat_dir}/data.json 125 | done 126 | fi 127 | 128 | if [ -z ${tag} ]; then 129 | expdir=exp/train_in${einput}_hidden${ehidden}_e${elayer}_${etype}_drop${edropout}_${atype}_emb${dembed}_hidden${dhidden}_d${dlayer}_epoch${epochs}_norm${max_norm}_bs${batch_size}_mli${maxlen_in}_mlo${maxlen_out}_${optimizer}_lr${lr}_mmt${momentum}_l2${l2} 130 | if ${do_delta}; then 131 | expdir=${expdir}_delta 132 | fi 133 | else 134 | expdir=exp/train_${tag} 135 | fi 136 | mkdir -p ${expdir} 137 | 138 | if [ ${stage} -le 3 ]; then 139 | echo "Stage 3: Network Training" 140 | ${cuda_cmd} --gpu ${ngpu} ${expdir}/train.log \ 141 | train.py \ 142 | --train_json ${feat_train_dir}/data.json \ 143 | --valid_json ${feat_dev_dir}/data.json \ 144 | --dict ${dict} \ 145 | --einput $einput \ 146 | --ehidden $ehidden \ 147 | --elayer $elayer \ 148 | --edropout $edropout \ 149 | --ebidirectional $ebidirectional \ 150 | --etype $etype \ 151 | --atype $atype \ 152 | --dembed $dembed \ 153 | --dhidden $dhidden \ 154 | --dlayer $dlayer \ 155 | --epochs $epochs \ 156 | --half_lr $half_lr \ 157 | --early_stop $early_stop \ 158 | --max_norm $max_norm \ 159 | --batch_size $batch_size \ 160 | --maxlen_in $maxlen_in \ 161 | --maxlen_out $maxlen_out \ 162 | --optimizer $optimizer \ 163 | --lr $lr \ 164 | --momentum $momentum \ 165 | --l2 $l2 \ 166 | --save_folder ${expdir} \ 167 | --checkpoint $checkpoint \ 168 | --continue_from "$continue_from" \ 169 | --print_freq ${print_freq} \ 170 | --visdom $visdom \ 171 | --visdom_id "$visdom_id" 172 | fi 173 | 174 | if [ ${stage} -le 4 ]; then 175 | echo "Stage 4: Decoding" 176 | decode_dir=${expdir}/decode_test_beam${beam_size}_nbest${nbest}_ml${decode_max_len} 177 | mkdir -p ${decode_dir} 178 | ${cuda_cmd} --gpu ${ngpu} ${decode_dir}/decode.log \ 179 | recognize.py \ 180 | --recog_json ${feat_test_dir}/data.json \ 181 | --dict $dict \ 182 | --result_label ${decode_dir}/data.json \ 183 | --model_path ${expdir}/final.pth.tar \ 184 | --beam_size $beam_size \ 185 | --nbest $nbest \ 186 | --decode_max_len $decode_max_len 187 | 188 | # Compute CER 189 | local/score.sh --nlsyms ${nlsyms} ${decode_dir} ${dict} 190 | fi 191 | -------------------------------------------------------------------------------- /egs/aishell/steps: -------------------------------------------------------------------------------- 1 | ../../tools/kaldi/egs/wsj/s5/steps -------------------------------------------------------------------------------- /egs/aishell/utils: -------------------------------------------------------------------------------- 1 | ../../tools/kaldi/egs/wsj/s5/utils -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | visdom -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaituoxu/Listen-Attend-Spell/b43ce63eaf68252fae2056bfcbbdef18c4be2340/src/__init__.py -------------------------------------------------------------------------------- /src/bin/example.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python train.py --train_json ../../test/data/data.json --valid_json ../../test/data/data.json --vocab ../../test/data/train_nodup_sp_units.txt --einput 83 --print_freq 1 --checkpoint --epochs 5 --batch_size 2 & 4 | 5 | python recognize.py --recog_json ../../test/data/data.json --vocab ../../test/data/train_nodup_sp_units.txt --result_label ./result.json --model_path exp/temp/final.pth.tar --beam_size 3 --nbest 2 > log & 6 | -------------------------------------------------------------------------------- /src/bin/recognize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import json 4 | 5 | import torch 6 | 7 | import kaldi_io 8 | from seq2seq import Seq2Seq 9 | from utils import add_results_to_json, process_dict 10 | 11 | 12 | parser = argparse.ArgumentParser( 13 | "End-to-End Automatic Speech Recognition Decoding.") 14 | # data 15 | parser.add_argument('--recog_json', type=str, required=True, 16 | help='Filename of recognition data (json)') 17 | parser.add_argument('--dict', type=str, required=True, 18 | help='Dictionary which should include ') 19 | parser.add_argument('--result_label', type=str, required=True, 20 | help='Filename of result label data (json)') 21 | # model 22 | parser.add_argument('--model_path', type=str, required=True, 23 | help='Path to model file created by training') 24 | # decode 25 | parser.add_argument('--beam_size', default=1, type=int, 26 | help='Beam size') 27 | parser.add_argument('--nbest', default=1, type=int, 28 | help='Nbest size') 29 | parser.add_argument('--decode_max_len', default=0, type=int, 30 | help='Max output length. If ==0 (default), it uses a ' 31 | 'end-detect function to automatically find maximum ' 32 | 'hypothesis lengths') 33 | 34 | 35 | def recognize(args): 36 | model = Seq2Seq.load_model(args.model_path) 37 | print(model) 38 | model.eval() 39 | model.cuda() 40 | char_list, sos_id, eos_id = process_dict(args.dict) 41 | assert model.decoder.sos_id == sos_id and model.decoder.eos_id == eos_id 42 | 43 | # read json data 44 | with open(args.recog_json, 'rb') as f: 45 | js = json.load(f)['utts'] 46 | 47 | # decode each utterance 48 | new_js = {} 49 | with torch.no_grad(): 50 | for idx, name in enumerate(js.keys(), 1): 51 | print('(%d/%d) decoding %s' % 52 | (idx, len(js.keys()), name), flush=True) 53 | input = kaldi_io.read_mat(js[name]['input'][0]['feat']) # TxD 54 | input = torch.from_numpy(input).float() 55 | input_length = torch.tensor([input.size(0)], dtype=torch.int) 56 | input = input.cuda() 57 | input_length = input_length.cuda() 58 | nbest_hyps = model.recognize(input, input_length, char_list, args) 59 | new_js[name] = add_results_to_json(js[name], nbest_hyps, char_list) 60 | 61 | with open(args.result_label, 'wb') as f: 62 | f.write(json.dumps({'utts': new_js}, indent=4, 63 | sort_keys=True).encode('utf_8')) 64 | 65 | 66 | if __name__ == "__main__": 67 | args = parser.parse_args() 68 | print(args, flush=True) 69 | recognize(args) 70 | -------------------------------------------------------------------------------- /src/bin/train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | 4 | import torch 5 | 6 | from data import AudioDataLoader, AudioDataset 7 | from decoder import Decoder 8 | from encoder import Encoder 9 | from seq2seq import Seq2Seq 10 | from solver import Solver 11 | from utils import process_dict 12 | 13 | 14 | parser = argparse.ArgumentParser( 15 | "End-to-End Automatic Speech Recognition Training " 16 | "(Listen Attend and Spell framework).") 17 | # General config 18 | # Task related 19 | parser.add_argument('--train_json', type=str, default=None, 20 | help='Filename of train label data (json)') 21 | parser.add_argument('--valid_json', type=str, default=None, 22 | help='Filename of validation label data (json)') 23 | parser.add_argument('--dict', type=str, required=True, 24 | help='Dictionary which should include ') 25 | # Network architecture 26 | # encoder 27 | # TODO: automatically infer input dim 28 | parser.add_argument('--einput', default=80, type=int, 29 | help='Dim of encoder input') 30 | parser.add_argument('--ehidden', default=512, type=int, 31 | help='Size of encoder hidden units') 32 | parser.add_argument('--elayer', default=4, type=int, 33 | help='Number of encoder layers.') 34 | parser.add_argument('--edropout', default=0.0, type=float, 35 | help='Encoder dropout rate') 36 | parser.add_argument('--ebidirectional', default=1, type=int, 37 | help='Whether use bidirectional encoder') 38 | parser.add_argument('--etype', default='lstm', type=str, 39 | help='Type of encoder RNN') 40 | # attention 41 | parser.add_argument('--atype', default='dot', type=str, 42 | help='Type of attention (Only support Dot Product now)') 43 | # decoder 44 | parser.add_argument('--dembed', default=512, type=int, 45 | help='Size of decoder embedding') 46 | parser.add_argument('--dhidden', default=512*2, type=int, 47 | help='Size of decoder hidden units. Should be encoder ' 48 | '(2*) hidden size dependding on bidirection') 49 | parser.add_argument('--dlayer', default=1, type=int, 50 | help='Number of decoder layers.') 51 | 52 | # Training config 53 | parser.add_argument('--epochs', default=30, type=int, 54 | help='Number of maximum epochs') 55 | parser.add_argument('--half_lr', dest='half_lr', default=0, type=int, 56 | help='Halving learning rate when get small improvement') 57 | parser.add_argument('--early_stop', dest='early_stop', default=0, type=int, 58 | help='Early stop training when halving lr but still get' 59 | 'small improvement') 60 | parser.add_argument('--max_norm', default=5, type=float, 61 | help='Gradient norm threshold to clip') 62 | # minibatch 63 | parser.add_argument('--batch_size', '-b', default=32, type=int, 64 | help='Batch size') 65 | parser.add_argument('--maxlen_in', default=800, type=int, metavar='ML', 66 | help='Batch size is reduced if the input sequence length > ML') 67 | parser.add_argument('--maxlen_out', default=150, type=int, metavar='ML', 68 | help='Batch size is reduced if the output sequence length > ML') 69 | parser.add_argument('--num_workers', default=4, type=int, 70 | help='Number of workers to generate minibatch') 71 | # optimizer 72 | parser.add_argument('--optimizer', default='adam', type=str, 73 | choices=['sgd', 'adam'], 74 | help='Optimizer (support sgd and adam now)') 75 | parser.add_argument('--lr', default=1e-3, type=float, 76 | help='Init learning rate') 77 | parser.add_argument('--momentum', default=0.0, type=float, 78 | help='Momentum for optimizer') 79 | parser.add_argument('--l2', default=0.0, type=float, 80 | help='weight decay (L2 penalty)') 81 | # save and load model 82 | parser.add_argument('--save_folder', default='exp/temp', 83 | help='Location to save epoch models') 84 | parser.add_argument('--checkpoint', dest='checkpoint', default=0, type=int, 85 | help='Enables checkpoint saving of model') 86 | parser.add_argument('--continue_from', default='', 87 | help='Continue from checkpoint model') 88 | parser.add_argument('--model_path', default='final.pth.tar', 89 | help='Location to save best validation model') 90 | # logging 91 | parser.add_argument('--print_freq', default=10, type=int, 92 | help='Frequency of printing training infomation') 93 | parser.add_argument('--visdom', dest='visdom', type=int, default=0, 94 | help='Turn on visdom graphing') 95 | parser.add_argument('--visdom_id', default='LAS training', 96 | help='Identifier for visdom run') 97 | 98 | 99 | def main(args): 100 | # Construct Solver 101 | # data 102 | tr_dataset = AudioDataset(args.train_json, args.batch_size, 103 | args.maxlen_in, args.maxlen_out) 104 | cv_dataset = AudioDataset(args.valid_json, args.batch_size, 105 | args.maxlen_in, args.maxlen_out) 106 | tr_loader = AudioDataLoader(tr_dataset, batch_size=1, 107 | num_workers=args.num_workers) 108 | cv_loader = AudioDataLoader(cv_dataset, batch_size=1, 109 | num_workers=args.num_workers) 110 | # load dictionary and generate char_list, sos_id, eos_id 111 | char_list, sos_id, eos_id = process_dict(args.dict) 112 | vocab_size = len(char_list) 113 | data = {'tr_loader': tr_loader, 'cv_loader': cv_loader} 114 | # model 115 | encoder = Encoder(args.einput, args.ehidden, args.elayer, 116 | dropout=args.edropout, bidirectional=args.ebidirectional, 117 | rnn_type=args.etype) 118 | decoder = Decoder(vocab_size, args.dembed, sos_id, 119 | eos_id, args.dhidden, args.dlayer, 120 | bidirectional_encoder=args.ebidirectional) 121 | model = Seq2Seq(encoder, decoder) 122 | print(model) 123 | model.cuda() 124 | # optimizer 125 | if args.optimizer == 'sgd': 126 | optimizier = torch.optim.SGD(model.parameters(), 127 | lr=args.lr, 128 | momentum=args.momentum, 129 | weight_decay=args.l2) 130 | elif args.optimizer == 'adam': 131 | optimizier = torch.optim.Adam(model.parameters(), 132 | lr=args.lr, 133 | weight_decay=args.l2) 134 | else: 135 | print("Not support optimizer") 136 | return 137 | 138 | # solver 139 | solver = Solver(data, model, optimizier, args) 140 | solver.train() 141 | 142 | 143 | if __name__ == '__main__': 144 | args = parser.parse_args() 145 | print(args) 146 | main(args) 147 | -------------------------------------------------------------------------------- /src/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaituoxu/Listen-Attend-Spell/b43ce63eaf68252fae2056bfcbbdef18c4be2340/src/data/__init__.py -------------------------------------------------------------------------------- /src/data/data.py: -------------------------------------------------------------------------------- 1 | """ 2 | Logic: 3 | 1. AudioDataLoader generate a minibatch from AudioDataset, the size of this 4 | minibatch is AudioDataLoader's batchsize. For now, we always set 5 | AudioDataLoader's batchsize as 1. The real minibatch size we care about is 6 | set in AudioDataset's __init__(...). So actually, we generate the 7 | information of one minibatch in AudioDataset. 8 | 2. After AudioDataLoader getting one minibatch from AudioDataset, 9 | AudioDataLoader calls its collate_fn(batch) to process this minibatch. 10 | """ 11 | import json 12 | 13 | import numpy as np 14 | import torch 15 | import torch.utils.data as data 16 | 17 | import kaldi_io 18 | from utils import IGNORE_ID, pad_list 19 | 20 | 21 | class AudioDataset(data.Dataset): 22 | """ 23 | TODO: this is a little HACK now, put batch_size here now. 24 | remove batch_size to dataloader later. 25 | """ 26 | 27 | def __init__(self, data_json_path, batch_size, max_length_in, max_length_out, 28 | num_batches=0): 29 | # From: espnet/src/asr/asr_utils.py: make_batchset() 30 | """ 31 | Args: 32 | data: espnet/espnet json format file. 33 | num_batches: for debug. only use num_batches minibatch but not all. 34 | """ 35 | super(AudioDataset, self).__init__() 36 | with open(data_json_path, 'rb') as f: 37 | data = json.load(f)['utts'] 38 | # sort it by input lengths (long to short) 39 | sorted_data = sorted(data.items(), key=lambda data: int( 40 | data[1]['input'][0]['shape'][0]), reverse=True) 41 | # change batchsize depending on the input and output length 42 | minibatch = [] 43 | start = 0 44 | while True: 45 | ilen = int(sorted_data[start][1]['input'][0]['shape'][0]) 46 | olen = int(sorted_data[start][1]['output'][0]['shape'][0]) 47 | factor = max(int(ilen / max_length_in), int(olen / max_length_out)) 48 | # if ilen = 1000 and max_length_in = 800 49 | # then b = batchsize / 2 50 | # and max(1, .) avoids batchsize = 0 51 | b = max(1, int(batch_size / (1 + factor))) 52 | end = min(len(sorted_data), start + b) 53 | minibatch.append(sorted_data[start:end]) 54 | if end == len(sorted_data): 55 | break 56 | start = end 57 | if num_batches > 0: 58 | minibatch = minibatch[:num_batches] 59 | self.minibatch = minibatch 60 | 61 | def __getitem__(self, index): 62 | return self.minibatch[index] 63 | 64 | def __len__(self): 65 | return len(self.minibatch) 66 | 67 | 68 | class AudioDataLoader(data.DataLoader): 69 | """ 70 | NOTE: just use batchsize=1 here, so drop_last=True makes no sense here. 71 | """ 72 | 73 | def __init__(self, *args, **kwargs): 74 | super(AudioDataLoader, self).__init__(*args, **kwargs) 75 | self.collate_fn = _collate_fn 76 | 77 | 78 | # From: espnet/src/asr/asr_pytorch.py: CustomConverter:__call__ 79 | def _collate_fn(batch): 80 | """ 81 | Args: 82 | batch: list, len(batch) = 1. See AudioDataset.__getitem__() 83 | Returns: 84 | xs_pad: N x Ti x D, torch.Tensor 85 | ilens : N, torch.Tentor 86 | ys_pad: N x To, torch.Tensor 87 | """ 88 | # batch should be located in list 89 | assert len(batch) == 1 90 | batch = load_inputs_and_targets(batch[0]) 91 | xs, ys = batch 92 | 93 | # TODO: perform subsamping 94 | 95 | # get batch of lengths of input sequences 96 | ilens = np.array([x.shape[0] for x in xs]) 97 | 98 | # perform padding and convert to tensor 99 | xs_pad = pad_list([torch.from_numpy(x).float() for x in xs], 0) 100 | ilens = torch.from_numpy(ilens) 101 | ys_pad = pad_list([torch.from_numpy(y).long() for y in ys], IGNORE_ID) 102 | return xs_pad, ilens, ys_pad 103 | 104 | 105 | # ------------------------------ utils ------------------------------------ 106 | def load_inputs_and_targets(batch): 107 | # From: espnet/src/asr/asr_utils.py: load_inputs_and_targets 108 | # load acoustic features and target sequence of token ids 109 | # for b in batch: 110 | # print(b[1]['input'][0]['feat']) 111 | xs = [kaldi_io.read_mat(b[1]['input'][0]['feat']) for b in batch] 112 | ys = [b[1]['output'][0]['tokenid'].split() for b in batch] 113 | 114 | # get index of non-zero length samples 115 | nonzero_idx = filter(lambda i: len(ys[i]) > 0, range(len(xs))) 116 | # sort in input lengths 117 | nonzero_sorted_idx = sorted(nonzero_idx, key=lambda i: -len(xs[i])) 118 | if len(nonzero_sorted_idx) != len(xs): 119 | print("warning: Target sequences include empty tokenid") 120 | 121 | # remove zero-lenght samples 122 | xs = [xs[i] for i in nonzero_sorted_idx] 123 | ys = [np.fromiter(map(int, ys[i]), dtype=np.int64) 124 | for i in nonzero_sorted_idx] 125 | 126 | return xs, ys 127 | -------------------------------------------------------------------------------- /src/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaituoxu/Listen-Attend-Spell/b43ce63eaf68252fae2056bfcbbdef18c4be2340/src/models/__init__.py -------------------------------------------------------------------------------- /src/models/attention.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class DotProductAttention(nn.Module): 7 | r"""Dot product attention. 8 | Given a set of vector values, and a vector query, attention is a technique 9 | to compute a weighted sum of the values, dependent on the query. 10 | 11 | NOTE: Here we use the terminology in Stanford cs224n-2018-lecture11. 12 | """ 13 | 14 | def __init__(self): 15 | super(DotProductAttention, self).__init__() 16 | # TODO: move this out of this class? 17 | # self.linear_out = nn.Linear(dim*2, dim) 18 | 19 | def forward(self, queries, values): 20 | """ 21 | Args: 22 | queries: N x To x H 23 | values : N x Ti x H 24 | 25 | Returns: 26 | output: N x To x H 27 | attention_distribution: N x To x Ti 28 | """ 29 | batch_size = queries.size(0) 30 | hidden_size = queries.size(2) 31 | input_lengths = values.size(1) 32 | # (N, To, H) * (N, H, Ti) -> (N, To, Ti) 33 | attention_scores = torch.bmm(queries, values.transpose(1, 2)) 34 | attention_distribution = F.softmax( 35 | attention_scores.view(-1, input_lengths), dim=1).view(batch_size, -1, input_lengths) 36 | # (N, To, Ti) * (N, Ti, H) -> (N, To, H) 37 | attention_output = torch.bmm(attention_distribution, values) 38 | # # concat -> (N, To, 2*H) 39 | # concated = torch.cat((attention_output, queries), dim=2) 40 | # # TODO: Move this out of this class? 41 | # # output -> (N, To, H) 42 | # output = torch.tanh(self.linear_out( 43 | # concated.view(-1, 2*hidden_size))).view(batch_size, -1, hidden_size) 44 | 45 | return attention_output, attention_distribution 46 | -------------------------------------------------------------------------------- /src/models/decoder.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | from attention import DotProductAttention 7 | from utils import IGNORE_ID, pad_list 8 | 9 | 10 | class Decoder(nn.Module): 11 | """ 12 | """ 13 | 14 | def __init__(self, vocab_size, embedding_dim, sos_id, eos_id, hidden_size, 15 | num_layers, bidirectional_encoder=True): 16 | super(Decoder, self).__init__() 17 | # Hyper parameters 18 | # embedding + output 19 | self.vocab_size = vocab_size 20 | self.embedding_dim = embedding_dim 21 | self.sos_id = sos_id # Start of Sentence 22 | self.eos_id = eos_id # End of Sentence 23 | # rnn 24 | self.hidden_size = hidden_size 25 | self.num_layers = num_layers 26 | self.bidirectional_encoder = bidirectional_encoder # useless now 27 | self.encoder_hidden_size = hidden_size # must be equal now 28 | # Components 29 | self.embedding = nn.Embedding(self.vocab_size, self.embedding_dim) 30 | self.rnn = nn.ModuleList() 31 | self.rnn += [nn.LSTMCell(self.embedding_dim + 32 | self.encoder_hidden_size, self.hidden_size)] 33 | for l in range(1, self.num_layers): 34 | self.rnn += [nn.LSTMCell(self.hidden_size, self.hidden_size)] 35 | self.attention = DotProductAttention() 36 | self.mlp = nn.Sequential( 37 | nn.Linear(self.encoder_hidden_size + self.hidden_size, 38 | self.hidden_size), 39 | nn.Tanh(), 40 | nn.Linear(self.hidden_size, self.vocab_size)) 41 | 42 | def zero_state(self, encoder_padded_outputs, H=None): 43 | N = encoder_padded_outputs.size(0) 44 | H = self.hidden_size if H == None else H 45 | return encoder_padded_outputs.new_zeros(N, H) 46 | 47 | def forward(self, padded_input, encoder_padded_outputs): 48 | """ 49 | Args: 50 | padded_input: N x To 51 | # encoder_hidden: (num_layers * num_directions) x N x H 52 | encoder_padded_outputs: N x Ti x H 53 | 54 | Returns: 55 | """ 56 | # *********Get Input and Output 57 | # from espnet/Decoder.forward() 58 | # TODO: need to make more smart way 59 | ys = [y[y != IGNORE_ID] for y in padded_input] # parse padded ys 60 | # prepare input and output word sequences with sos/eos IDs 61 | eos = ys[0].new([self.eos_id]) 62 | sos = ys[0].new([self.sos_id]) 63 | ys_in = [torch.cat([sos, y], dim=0) for y in ys] 64 | ys_out = [torch.cat([y, eos], dim=0) for y in ys] 65 | # padding for ys with -1 66 | # pys: utt x olen 67 | ys_in_pad = pad_list(ys_in, self.eos_id) 68 | ys_out_pad = pad_list(ys_out, IGNORE_ID) 69 | # print("ys_in_pad", ys_in_pad.size()) 70 | assert ys_in_pad.size() == ys_out_pad.size() 71 | batch_size = ys_in_pad.size(0) 72 | output_length = ys_in_pad.size(1) 73 | # max_length = ys_in_pad.size(1) - 1 # TODO: should minus 1(sos)? 74 | 75 | # *********Init decoder rnn 76 | h_list = [self.zero_state(encoder_padded_outputs)] 77 | c_list = [self.zero_state(encoder_padded_outputs)] 78 | for l in range(1, self.num_layers): 79 | h_list.append(self.zero_state(encoder_padded_outputs)) 80 | c_list.append(self.zero_state(encoder_padded_outputs)) 81 | att_c = self.zero_state(encoder_padded_outputs, 82 | H=encoder_padded_outputs.size(2)) 83 | y_all = [] 84 | 85 | # **********LAS: 1. decoder rnn 2. attention 3. concate and MLP 86 | embedded = self.embedding(ys_in_pad) 87 | for t in range(output_length): 88 | # step 1. decoder RNN: s_i = RNN(s_i−1,y_i−1,c_i−1) 89 | rnn_input = torch.cat((embedded[:, t, :], att_c), dim=1) 90 | h_list[0], c_list[0] = self.rnn[0]( 91 | rnn_input, (h_list[0], c_list[0])) 92 | for l in range(1, self.num_layers): 93 | h_list[l], c_list[l] = self.rnn[l]( 94 | h_list[l-1], (h_list[l], c_list[l])) 95 | rnn_output = h_list[-1] # below unsqueeze: (N x H) -> (N x 1 x H) 96 | # step 2. attention: c_i = AttentionContext(s_i,h) 97 | att_c, att_w = self.attention(rnn_output.unsqueeze(dim=1), 98 | encoder_padded_outputs) 99 | att_c = att_c.squeeze(dim=1) 100 | # step 3. concate s_i and c_i, and input to MLP 101 | mlp_input = torch.cat((rnn_output, att_c), dim=1) 102 | predicted_y_t = self.mlp(mlp_input) 103 | y_all.append(predicted_y_t) 104 | 105 | y_all = torch.stack(y_all, dim=1) # N x To x C 106 | # **********Cross Entropy Loss 107 | # F.cross_entropy = NLL(log_softmax(input), target)) 108 | y_all = y_all.view(batch_size * output_length, self.vocab_size) 109 | ce_loss = F.cross_entropy(y_all, ys_out_pad.view(-1), 110 | ignore_index=IGNORE_ID, 111 | reduction='elementwise_mean') 112 | # TODO: should minus 1 here ? 113 | # ce_loss *= (np.mean([len(y) for y in ys_in]) - 1) 114 | # print("ys_in\n", ys_in) 115 | # temp = [len(x) for x in ys_in] 116 | # print(temp) 117 | # print(np.mean(temp) - 1) 118 | return ce_loss 119 | 120 | # *********step decode 121 | # decoder_outputs = [] 122 | # sequence_symbols = [] 123 | # lengths = np.array([max_length] * batch_size) 124 | 125 | # def decode(step, step_output, step_attn): 126 | # # step_output is log_softmax() 127 | # decoder_outputs.append(step_output) 128 | # symbols = decoder_outputs[-1].topk(1)[1] 129 | # sequence_symbols.append(symbols) 130 | # # 131 | # eos_batches = symbols.data.eq(self.eos_id) 132 | # if eos_batches.dim() > 0: 133 | # eos_batches = eos_batches.cpu().view(-1).numpy() 134 | # update_idx = ((step < lengths) & eos_batches) != 0 135 | # lengths[update_idx] = len(sequence_symbols) 136 | # return symbols 137 | 138 | # # *********Run each component 139 | # decoder_input = ys_in_pad 140 | # embedded = self.embedding(decoder_input) 141 | # rnn_output, decoder_hidden = self.rnn(embedded) # use zero state 142 | # output, attn = self.attention(rnn_output, encoder_padded_outputs) 143 | # output = output.contiguous().view(-1, self.hidden_size) 144 | # predicted_softmax = F.log_softmax(self.out(output), dim=1).view( 145 | # batch_size, output_length, -1) 146 | # for t in range(predicted_softmax.size(1)): 147 | # step_output = predicted_softmax[:, t, :] 148 | # step_attn = attn[:, t, :] 149 | # decode(t, step_output, step_attn) 150 | 151 | def recognize_beam(self, encoder_outputs, char_list, args): 152 | """Beam search, decode one utterence now. 153 | Args: 154 | encoder_outputs: T x H 155 | char_list: list of character 156 | args: args.beam 157 | 158 | Returns: 159 | nbest_hyps: 160 | """ 161 | # search params 162 | beam = args.beam_size 163 | nbest = args.nbest 164 | if args.decode_max_len == 0: 165 | maxlen = encoder_outputs.size(0) 166 | else: 167 | maxlen = args.decode_max_len 168 | 169 | # *********Init decoder rnn 170 | h_list = [self.zero_state(encoder_outputs.unsqueeze(0))] 171 | c_list = [self.zero_state(encoder_outputs.unsqueeze(0))] 172 | for l in range(1, self.num_layers): 173 | h_list.append(self.zero_state(encoder_outputs.unsqueeze(0))) 174 | c_list.append(self.zero_state(encoder_outputs.unsqueeze(0))) 175 | att_c = self.zero_state(encoder_outputs.unsqueeze(0), 176 | H=encoder_outputs.unsqueeze(0).size(2)) 177 | # prepare sos 178 | y = self.sos_id 179 | vy = encoder_outputs.new_zeros(1).long() 180 | 181 | hyp = {'score': 0.0, 'yseq': [y], 'c_prev': c_list, 'h_prev': h_list, 182 | 'a_prev': att_c} 183 | hyps = [hyp] 184 | ended_hyps = [] 185 | 186 | for i in range(maxlen): 187 | hyps_best_kept = [] 188 | for hyp in hyps: 189 | # vy.unsqueeze(1) 190 | vy[0] = hyp['yseq'][i] 191 | embedded = self.embedding(vy) 192 | # embedded.unsqueeze(0) 193 | # step 1. decoder RNN: s_i = RNN(s_i−1,y_i−1,c_i−1) 194 | rnn_input = torch.cat((embedded, hyp['a_prev']), dim=1) 195 | h_list[0], c_list[0] = self.rnn[0]( 196 | rnn_input, (hyp['h_prev'][0], hyp['c_prev'][0])) 197 | for l in range(1, self.num_layers): 198 | h_list[l], c_list[l] = self.rnn[l]( 199 | h_list[l-1], (hyp['h_prev'][l], hyp['c_prev'][l])) 200 | rnn_output = h_list[-1] 201 | # step 2. attention: c_i = AttentionContext(s_i,h) 202 | # below unsqueeze: (N x H) -> (N x 1 x H) 203 | att_c, att_w = self.attention(rnn_output.unsqueeze(dim=1), 204 | encoder_outputs.unsqueeze(0)) 205 | att_c = att_c.squeeze(dim=1) 206 | # step 3. concate s_i and c_i, and input to MLP 207 | mlp_input = torch.cat((rnn_output, att_c), dim=1) 208 | predicted_y_t = self.mlp(mlp_input) 209 | local_scores = F.log_softmax(predicted_y_t, dim=1) 210 | # topk scores 211 | local_best_scores, local_best_ids = torch.topk( 212 | local_scores, beam, dim=1) 213 | 214 | for j in range(beam): 215 | new_hyp = {} 216 | new_hyp['h_prev'] = h_list[:] 217 | new_hyp['c_prev'] = c_list[:] 218 | new_hyp['a_prev'] = att_c[:] 219 | new_hyp['score'] = hyp['score'] + local_best_scores[0, j] 220 | new_hyp['yseq'] = [0] * (1 + len(hyp['yseq'])) 221 | new_hyp['yseq'][:len(hyp['yseq'])] = hyp['yseq'] 222 | new_hyp['yseq'][len(hyp['yseq'])] = int( 223 | local_best_ids[0, j]) 224 | # will be (2 x beam) hyps at most 225 | hyps_best_kept.append(new_hyp) 226 | 227 | hyps_best_kept = sorted(hyps_best_kept, 228 | key=lambda x: x['score'], 229 | reverse=True)[:beam] 230 | # end for hyp in hyps 231 | hyps = hyps_best_kept 232 | 233 | # add eos in the final loop to avoid that there are no ended hyps 234 | if i == maxlen - 1: 235 | for hyp in hyps: 236 | hyp['yseq'].append(self.eos_id) 237 | 238 | # add ended hypothes to a final list, and removed them from current hypothes 239 | # (this will be a probmlem, number of hyps < beam) 240 | remained_hyps = [] 241 | for hyp in hyps: 242 | if hyp['yseq'][-1] == self.eos_id: 243 | # hyp['score'] += (i + 1) * penalty 244 | ended_hyps.append(hyp) 245 | else: 246 | remained_hyps.append(hyp) 247 | 248 | hyps = remained_hyps 249 | if len(hyps) > 0: 250 | print('remeined hypothes: ' + str(len(hyps))) 251 | else: 252 | print('no hypothesis. Finish decoding.') 253 | break 254 | 255 | for hyp in hyps: 256 | print('hypo: ' + ''.join([char_list[int(x)] 257 | for x in hyp['yseq'][1:]])) 258 | # end for i in range(maxlen) 259 | nbest_hyps = sorted(ended_hyps, key=lambda x: x['score'], reverse=True)[ 260 | :min(len(ended_hyps), nbest)] 261 | return nbest_hyps 262 | -------------------------------------------------------------------------------- /src/models/encoder.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence 3 | 4 | 5 | class Encoder(nn.Module): 6 | r"""Applies a multi-layer LSTM to an variable length input sequence. 7 | """ 8 | 9 | def __init__(self, input_size, hidden_size, num_layers, 10 | dropout=0.0, bidirectional=True, rnn_type='lstm'): 11 | super(Encoder, self).__init__() 12 | self.input_size = input_size 13 | self.hidden_size = hidden_size 14 | self.num_layers = num_layers 15 | self.bidirectional = bidirectional 16 | self.rnn_type = rnn_type 17 | self.dropout = dropout 18 | if self.rnn_type == 'lstm': 19 | self.rnn = nn.LSTM(input_size, hidden_size, num_layers, 20 | batch_first=True, 21 | dropout=dropout, 22 | bidirectional=bidirectional) 23 | 24 | def forward(self, padded_input, input_lengths): 25 | """ 26 | Args: 27 | padded_input: N x T x D 28 | input_lengths: N 29 | 30 | Returns: output, hidden 31 | - **output**: N x T x H 32 | - **hidden**: (num_layers * num_directions) x N x H 33 | """ 34 | # Add total_length for supportting nn.DataParallel() later 35 | # see https://pytorch.org/docs/stable/notes/faq.html#pack-rnn-unpack-with-data-parallelism 36 | total_length = padded_input.size(1) # get the max sequence length 37 | packed_input = pack_padded_sequence(padded_input, input_lengths, 38 | batch_first=True) 39 | packed_output, hidden = self.rnn(packed_input) 40 | output, _ = pad_packed_sequence(packed_output, 41 | batch_first=True, 42 | total_length=total_length) 43 | return output, hidden 44 | 45 | def flatten_parameters(self): 46 | self.rnn.flatten_parameters() 47 | -------------------------------------------------------------------------------- /src/models/seq2seq.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from decoder import Decoder 5 | from encoder import Encoder 6 | 7 | 8 | class Seq2Seq(nn.Module): 9 | """Sequence-to-Sequence architecture with configurable encoder and decoder. 10 | """ 11 | 12 | def __init__(self, encoder, decoder): 13 | super(Seq2Seq, self).__init__() 14 | self.encoder = encoder 15 | self.decoder = decoder 16 | 17 | def forward(self, padded_input, input_lengths, padded_target): 18 | """ 19 | Args: 20 | padded_input: N x Ti x D 21 | input_lengths: N 22 | padded_targets: N x To 23 | """ 24 | encoder_padded_outputs, _ = self.encoder(padded_input, input_lengths) 25 | loss = self.decoder(padded_target, encoder_padded_outputs) 26 | return loss 27 | 28 | def recognize(self, input, input_length, char_list, args): 29 | """Sequence-to-Sequence beam search, decode one utterence now. 30 | Args: 31 | input: T x D 32 | char_list: list of characters 33 | args: args.beam 34 | 35 | Returns: 36 | nbest_hyps: 37 | """ 38 | encoder_outputs, _ = self.encoder(input.unsqueeze(0), input_length) 39 | nbest_hyps = self.decoder.recognize_beam(encoder_outputs[0], 40 | char_list, 41 | args) 42 | return nbest_hyps 43 | 44 | @classmethod 45 | def load_model(cls, path): 46 | # Load to CPU 47 | package = torch.load(path, map_location=lambda storage, loc: storage) 48 | model = cls.load_model_from_package(package) 49 | return model 50 | 51 | @classmethod 52 | def load_model_from_package(cls, package): 53 | encoder = Encoder(package['einput'], 54 | package['ehidden'], 55 | package['elayer'], 56 | dropout=package['edropout'], 57 | bidirectional=package['ebidirectional'], 58 | rnn_type=package['etype']) 59 | decoder = Decoder(package['dvocab_size'], 60 | package['dembed'], 61 | package['dsos_id'], 62 | package['deos_id'], 63 | package['dhidden'], 64 | package['dlayer'], 65 | bidirectional_encoder=package['ebidirectional'] 66 | ) 67 | encoder.flatten_parameters() 68 | model = cls(encoder, decoder) 69 | model.load_state_dict(package['state_dict']) 70 | return model 71 | 72 | @staticmethod 73 | def serialize(model, optimizer, epoch, tr_loss=None, cv_loss=None): 74 | package = { 75 | # encoder 76 | 'einput': model.encoder.input_size, 77 | 'ehidden': model.encoder.hidden_size, 78 | 'elayer': model.encoder.num_layers, 79 | 'edropout': model.encoder.dropout, 80 | 'ebidirectional': model.encoder.bidirectional, 81 | 'etype': model.encoder.rnn_type, 82 | # decoder 83 | 'dvocab_size': model.decoder.vocab_size, 84 | 'dembed': model.decoder.embedding_dim, 85 | 'dsos_id': model.decoder.sos_id, 86 | 'deos_id': model.decoder.eos_id, 87 | 'dhidden': model.decoder.hidden_size, 88 | 'dlayer': model.decoder.num_layers, 89 | # state 90 | 'state_dict': model.state_dict(), 91 | 'optim_dict': optimizer.state_dict(), 92 | 'epoch': epoch 93 | } 94 | if tr_loss is not None: 95 | package['tr_loss'] = tr_loss 96 | package['cv_loss'] = cv_loss 97 | return package 98 | -------------------------------------------------------------------------------- /src/solver/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaituoxu/Listen-Attend-Spell/b43ce63eaf68252fae2056bfcbbdef18c4be2340/src/solver/__init__.py -------------------------------------------------------------------------------- /src/solver/solver.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | import torch 5 | 6 | 7 | class Solver(object): 8 | """ 9 | """ 10 | 11 | def __init__(self, data, model, optimizer, args): 12 | self.tr_loader = data['tr_loader'] 13 | self.cv_loader = data['cv_loader'] 14 | self.model = model 15 | self.optimizer = optimizer 16 | 17 | # Training config 18 | self.epochs = args.epochs 19 | self.half_lr = args.half_lr 20 | self.early_stop = args.early_stop 21 | self.max_norm = args.max_norm 22 | # save and load model 23 | self.save_folder = args.save_folder 24 | self.checkpoint = args.checkpoint 25 | self.continue_from = args.continue_from 26 | self.model_path = args.model_path 27 | # logging 28 | self.print_freq = args.print_freq 29 | # visualizing loss using visdom 30 | self.tr_loss = torch.Tensor(self.epochs) 31 | self.cv_loss = torch.Tensor(self.epochs) 32 | self.visdom = args.visdom 33 | self.visdom_id = args.visdom_id 34 | if self.visdom: 35 | from visdom import Visdom 36 | self.vis = Visdom(env=self.visdom_id) 37 | self.vis_opts = dict(title=self.visdom_id, 38 | ylabel='Loss', xlabel='Epoch', 39 | legend=['train loss', 'cv loss']) 40 | self.vis_window = None 41 | self.vis_epochs = torch.arange(1, self.epochs + 1) 42 | 43 | self._reset() 44 | 45 | def _reset(self): 46 | # Reset 47 | if self.continue_from: 48 | print('Loading checkpoint model %s' % self.continue_from) 49 | package = torch.load(self.continue_from) 50 | self.model.load_state_dict(package['state_dict']) 51 | self.optimizer.load_state_dict(package['optim_dict']) 52 | self.start_epoch = int(package.get('epoch', 1)) 53 | self.tr_loss[:self.start_epoch] = package['tr_loss'][:self.start_epoch] 54 | self.cv_loss[:self.start_epoch] = package['cv_loss'][:self.start_epoch] 55 | else: 56 | self.start_epoch = 0 57 | # Create save folder 58 | os.makedirs(self.save_folder, exist_ok=True) 59 | self.prev_val_loss = float("inf") 60 | self.best_val_loss = float("inf") 61 | self.halving = False 62 | 63 | def train(self): 64 | # Train model multi-epoches 65 | for epoch in range(self.start_epoch, self.epochs): 66 | # Train one epoch 67 | print("Training...") 68 | self.model.train() # Turn on BatchNorm & Dropout 69 | start = time.time() 70 | tr_avg_loss = self._run_one_epoch(epoch) 71 | print('-' * 85) 72 | print('Train Summary | End of Epoch {0} | Time {1:.2f}s | ' 73 | 'Train Loss {2:.3f}'.format( 74 | epoch + 1, time.time() - start, tr_avg_loss)) 75 | print('-' * 85) 76 | 77 | # Save model each epoch 78 | if self.checkpoint: 79 | file_path = os.path.join( 80 | self.save_folder, 'epoch%d.pth.tar' % (epoch + 1)) 81 | torch.save(self.model.serialize(self.model, self.optimizer, epoch + 1, 82 | tr_loss=self.tr_loss, 83 | cv_loss=self.cv_loss), 84 | file_path) 85 | print('Saving checkpoint model to %s' % file_path) 86 | 87 | # Cross validation 88 | print('Cross validation...') 89 | self.model.eval() # Turn off Batchnorm & Dropout 90 | val_loss = self._run_one_epoch(epoch, cross_valid=True) 91 | print('-' * 85) 92 | print('Valid Summary | End of Epoch {0} | Time {1:.2f}s | ' 93 | 'Valid Loss {2:.3f}'.format( 94 | epoch + 1, time.time() - start, val_loss)) 95 | print('-' * 85) 96 | 97 | # Adjust learning rate (halving) 98 | if self.half_lr and val_loss >= self.prev_val_loss: 99 | if self.early_stop and self.halving: 100 | print("Already start halving learing rate, it still gets " 101 | "too small imporvement, stop training early.") 102 | break 103 | self.halving = True 104 | if self.halving: 105 | optim_state = self.optimizer.state_dict() 106 | optim_state['param_groups'][0]['lr'] = \ 107 | optim_state['param_groups'][0]['lr'] / 2.0 108 | self.optimizer.load_state_dict(optim_state) 109 | print('Learning rate adjusted to: {lr:.6f}'.format( 110 | lr=optim_state['param_groups'][0]['lr'])) 111 | self.prev_val_loss = val_loss 112 | 113 | # Save the best model 114 | self.tr_loss[epoch] = tr_avg_loss 115 | self.cv_loss[epoch] = val_loss 116 | if val_loss < self.best_val_loss: 117 | self.best_val_loss = val_loss 118 | file_path = os.path.join(self.save_folder, self.model_path) 119 | torch.save(self.model.serialize(self.model, self.optimizer, epoch + 1, 120 | tr_loss=self.tr_loss, 121 | cv_loss=self.cv_loss), 122 | file_path) 123 | print("Find better validated model, saving to %s" % file_path) 124 | 125 | # visualizing loss using visdom 126 | if self.visdom: 127 | x_axis = self.vis_epochs[0:epoch + 1] 128 | y_axis = torch.stack( 129 | (self.tr_loss[0:epoch + 1], self.cv_loss[0:epoch + 1]), dim=1) 130 | if self.vis_window is None: 131 | self.vis_window = self.vis.line( 132 | X=x_axis, 133 | Y=y_axis, 134 | opts=self.vis_opts, 135 | ) 136 | else: 137 | self.vis.line( 138 | X=x_axis.unsqueeze(0).expand(y_axis.size( 139 | 1), x_axis.size(0)).transpose(0, 1), # Visdom fix 140 | Y=y_axis, 141 | win=self.vis_window, 142 | update='replace', 143 | ) 144 | 145 | def _run_one_epoch(self, epoch, cross_valid=False): 146 | start = time.time() 147 | total_loss = 0 148 | 149 | data_loader = self.tr_loader if not cross_valid else self.cv_loader 150 | 151 | # visualizing loss using visdom 152 | if self.visdom and not cross_valid: 153 | vis_opts_epoch = dict(title=self.visdom_id + " epoch " + str(epoch), 154 | ylabel='Loss', xlabel='Epoch') 155 | vis_window_epoch = None 156 | vis_iters = torch.arange(1, len(data_loader) + 1) 157 | vis_iters_loss = torch.Tensor(len(data_loader)) 158 | 159 | for i, (data) in enumerate(data_loader): 160 | padded_input, input_lengths, padded_target = data 161 | padded_input = padded_input.cuda() 162 | input_lengths = input_lengths.cuda() 163 | padded_target = padded_target.cuda() 164 | loss = self.model(padded_input, input_lengths, padded_target) 165 | if not cross_valid: 166 | self.optimizer.zero_grad() 167 | loss.backward() 168 | grad_norm = torch.nn.utils.clip_grad_norm_(self.model.parameters(), 169 | self.max_norm) 170 | self.optimizer.step() 171 | 172 | total_loss += loss.item() 173 | 174 | if i % self.print_freq == 0: 175 | print('Epoch {0} | Iter {1} | Average Loss {2:.3f} | ' 176 | 'Current Loss {3:.6f} | {4:.1f} ms/batch'.format( 177 | epoch + 1, i + 1, total_loss / (i + 1), 178 | loss.item(), 1000 * (time.time() - start) / (i + 1)), 179 | flush=True) 180 | 181 | # visualizing loss using visdom 182 | if self.visdom and not cross_valid: 183 | vis_iters_loss[i] = loss.item() 184 | if i % self.print_freq == 0: 185 | x_axis = vis_iters[:i+1] 186 | y_axis = vis_iters_loss[:i+1] 187 | if vis_window_epoch is None: 188 | vis_window_epoch = self.vis.line(X=x_axis, Y=y_axis, 189 | opts=vis_opts_epoch) 190 | else: 191 | self.vis.line(X=x_axis, Y=y_axis, win=vis_window_epoch, 192 | update='replace') 193 | 194 | return total_loss / (i + 1) 195 | -------------------------------------------------------------------------------- /src/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaituoxu/Listen-Attend-Spell/b43ce63eaf68252fae2056bfcbbdef18c4be2340/src/utils/__init__.py -------------------------------------------------------------------------------- /src/utils/data2json.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2017 Johns Hopkins University (Shinji Watanabe) 4 | # Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) 5 | 6 | . ./path.sh 7 | 8 | nlsyms="" 9 | lang="" 10 | feat="" # feat.scp 11 | oov="" 12 | bpecode="" 13 | verbose=0 14 | 15 | . utils/parse_options.sh 16 | 17 | if [ $# != 2 ]; then 18 | echo "Usage: $0 "; 19 | exit 1; 20 | fi 21 | 22 | dir=$1 23 | dic=$2 24 | tmpdir=`mktemp -d ${dir}/tmp-XXXXX` 25 | rm -f ${tmpdir}/*.scp 26 | 27 | # input, which is not necessary for decoding mode, and make it as an option 28 | if [ ! -z ${feat} ]; then 29 | if [ ${verbose} -eq 0 ]; then 30 | utils/data/get_utt2num_frames.sh ${dir} &> /dev/null 31 | cp ${dir}/utt2num_frames ${tmpdir}/ilen.scp 32 | feat-to-dim scp:${feat} ark,t:${tmpdir}/idim.scp &> /dev/null 33 | else 34 | utils/data/get_utt2num_frames.sh ${dir} 35 | cp ${dir}/utt2num_frames ${tmpdir}/ilen.scp 36 | feat-to-dim scp:${feat} ark,t:${tmpdir}/idim.scp 37 | fi 38 | fi 39 | 40 | # output 41 | if [ ! -z ${bpecode} ]; then 42 | paste -d " " <(awk '{print $1}' ${dir}/text) <(cut -f 2- -d" " ${dir}/text | spm_encode --model=${bpecode} --output_format=piece) > ${tmpdir}/token.scp 43 | elif [ ! -z ${nlsyms} ]; then 44 | text2token.py -s 1 -n 1 -l ${nlsyms} ${dir}/text > ${tmpdir}/token.scp 45 | else 46 | text2token.py -s 1 -n 1 ${dir}/text > ${tmpdir}/token.scp 47 | fi 48 | cat ${tmpdir}/token.scp | utils/sym2int.pl --map-oov ${oov} -f 2- ${dic} > ${tmpdir}/tokenid.scp 49 | cat ${tmpdir}/tokenid.scp | awk '{print $1 " " NF-1}' > ${tmpdir}/olen.scp 50 | # +1 comes from 0-based dictionary 51 | vocsize=`tail -n 1 ${dic} | awk '{print $2}'` 52 | odim=`echo "$vocsize + 1" | bc` 53 | awk -v odim=${odim} '{print $1 " " odim}' ${dir}/text > ${tmpdir}/odim.scp 54 | 55 | # others 56 | if [ ! -z ${lang} ]; then 57 | awk -v lang=${lang} '{print $1 " " lang}' ${dir}/text > ${tmpdir}/lang.scp 58 | fi 59 | # feats 60 | cat ${feat} > ${tmpdir}/feat.scp 61 | 62 | rm -f ${tmpdir}/*.json 63 | for x in ${dir}/text ${dir}/utt2spk ${tmpdir}/*.scp; do 64 | k=`basename ${x} .scp` 65 | cat ${x} | scp2json.py --key ${k} > ${tmpdir}/${k}.json 66 | done 67 | mergejson.py --verbose ${verbose} ${tmpdir}/*.json 68 | 69 | rm -fr ${tmpdir} 70 | -------------------------------------------------------------------------------- /src/utils/dump.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2017 Nagoya University (Tomoki Hayashi) 4 | # Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) 5 | 6 | . ./path.sh 7 | 8 | cmd=run.pl 9 | do_delta=false 10 | nj=1 11 | verbose=0 12 | compress=true 13 | write_utt2num_frames=true 14 | 15 | . utils/parse_options.sh 16 | 17 | scp=$1 18 | cvmnark=$2 19 | logdir=$3 20 | dumpdir=$4 21 | 22 | if [ $# != 4 ]; then 23 | echo "Usage: $0 " 24 | exit 1; 25 | fi 26 | 27 | mkdir -p $logdir 28 | mkdir -p $dumpdir 29 | 30 | dumpdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' ${dumpdir} ${PWD}` 31 | 32 | for n in $(seq $nj); do 33 | # the next command does nothing unless $dumpdir/storage/ exists, see 34 | # utils/create_data_link.pl for more info. 35 | utils/create_data_link.pl ${dumpdir}/feats.${n}.ark 36 | done 37 | 38 | if $write_utt2num_frames; then 39 | write_num_frames_opt="--write-num-frames=ark,t:$dumpdir/utt2num_frames.JOB" 40 | else 41 | write_num_frames_opt= 42 | fi 43 | 44 | # split scp file 45 | split_scps="" 46 | for n in $(seq $nj); do 47 | split_scps="$split_scps $logdir/feats.$n.scp" 48 | done 49 | 50 | utils/split_scp.pl $scp $split_scps || exit 1; 51 | 52 | # dump features 53 | if ${do_delta};then 54 | $cmd JOB=1:$nj $logdir/dump_feature.JOB.log \ 55 | apply-cmvn --norm-vars=true $cvmnark scp:$logdir/feats.JOB.scp ark:- \| \ 56 | add-deltas ark:- ark:- \| \ 57 | copy-feats --compress=$compress --compression-method=2 ${write_num_frames_opt} \ 58 | ark:- ark,scp:${dumpdir}/feats.JOB.ark,${dumpdir}/feats.JOB.scp \ 59 | || exit 1 60 | else 61 | $cmd JOB=1:$nj $logdir/dump_feature.JOB.log \ 62 | apply-cmvn --norm-vars=true $cvmnark scp:$logdir/feats.JOB.scp ark:- \| \ 63 | copy-feats --compress=$compress --compression-method=2 ${write_num_frames_opt} \ 64 | ark:- ark,scp:${dumpdir}/feats.JOB.ark,${dumpdir}/feats.JOB.scp \ 65 | || exit 1 66 | fi 67 | 68 | # concatenate scp files 69 | for n in $(seq $nj); do 70 | cat $dumpdir/feats.$n.scp || exit 1; 71 | done > $dumpdir/feats.scp || exit 1 72 | 73 | if $write_utt2num_frames; then 74 | for n in $(seq $nj); do 75 | cat $dumpdir/utt2num_frames.$n || exit 1; 76 | done > $dumpdir/utt2num_frames || exit 1 77 | rm $dumpdir/utt2num_frames.* 2>/dev/null 78 | fi 79 | 80 | # remove temp scps 81 | rm $logdir/feats.*.scp 2>/dev/null 82 | if [ ${verbose} -eq 1 ]; then 83 | echo "Succeeded dumping features for training" 84 | fi 85 | -------------------------------------------------------------------------------- /src/utils/filt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Apache 2.0 4 | 5 | import sys 6 | import argparse 7 | 8 | if __name__ == '__main__': 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument('--exclude', '-v', dest='exclude', 11 | action='store_true', help='exclude filter words') 12 | parser.add_argument('filt', type=str, help='filter list') 13 | parser.add_argument('infile', type=str, help='input file') 14 | args = parser.parse_args() 15 | 16 | vocab = set() 17 | with open(args.filt) as vocabfile: 18 | for line in vocabfile: 19 | vocab.add(line.strip()) 20 | 21 | with open(args.infile) as textfile: 22 | for line in textfile: 23 | if args.exclude: 24 | print(" ".join( 25 | map(lambda word: word if not word in vocab else '', line.strip().split()))) 26 | # else: 27 | # print(" ".join(map(lambda word: word if word in vocab else '', unicode(line, 'utf_8').strip().split())).encode('utf_8')) 28 | -------------------------------------------------------------------------------- /src/utils/json2trn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # Copyright 2017 Johns Hopkins University (Shinji Watanabe) 5 | # Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) 6 | 7 | import json 8 | import argparse 9 | import logging 10 | from utils import process_dict 11 | 12 | if __name__ == '__main__': 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('json', type=str, help='json files') 15 | parser.add_argument('dict', type=str, help='dict') 16 | parser.add_argument('ref', type=str, help='ref') 17 | parser.add_argument('hyp', type=str, help='hyp') 18 | args = parser.parse_args() 19 | 20 | # logging info 21 | logging.basicConfig( 22 | level=logging.INFO, format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s") 23 | 24 | logging.info("reading %s", args.json) 25 | with open(args.json, 'r') as f: 26 | j = json.load(f) 27 | 28 | logging.info("reading %s", args.dict) 29 | char_list, sos_id, eos_id = process_dict(args.dict) 30 | # with open(args.dict, 'r') as f: 31 | # dictionary = f.readlines() 32 | # char_list = [unicode(entry.split(' ')[0], 'utf_8') for entry in dictionary] 33 | # char_list.insert(0, '') 34 | # char_list.append('') 35 | # print([x.encode('utf-8') for x in char_list]) 36 | 37 | logging.info("writing hyp trn to %s", args.hyp) 38 | logging.info("writing ref trn to %s", args.ref) 39 | h = open(args.hyp, 'w') 40 | r = open(args.ref, 'w') 41 | 42 | for x in j['utts']: 43 | seq = [char_list[int(i)] for i in j['utts'][x] 44 | ['output'][0]['rec_tokenid'].split()] 45 | h.write(" ".join(seq).replace('', '')), 46 | h.write( 47 | " (" + j['utts'][x]['utt2spk'].replace('-', '_') + "-" + x + ")\n") 48 | 49 | seq = [char_list[int(i)] for i in j['utts'][x] 50 | ['output'][0]['tokenid'].split()] 51 | r.write(" ".join(seq).replace('', '')), 52 | r.write( 53 | " (" + j['utts'][x]['utt2spk'].replace('-', '_') + "-" + x + ")\n") 54 | -------------------------------------------------------------------------------- /src/utils/mergejson.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # encoding: utf-8 3 | 4 | # Copyright 2017 Johns Hopkins University (Shinji Watanabe) 5 | # Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) 6 | 7 | import argparse 8 | import json 9 | import logging 10 | 11 | 12 | if __name__ == '__main__': 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('jsons', type=str, nargs='+', 15 | help='json files') 16 | parser.add_argument('--multi', '-m', type=int, 17 | help='Test the json file for multiple input/output', default=0) 18 | parser.add_argument('--verbose', '-V', default=0, type=int, 19 | help='Verbose option') 20 | args = parser.parse_args() 21 | 22 | # logging info 23 | if args.verbose > 0: 24 | logging.basicConfig( 25 | level=logging.INFO, format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s") 26 | else: 27 | logging.basicConfig( 28 | level=logging.WARN, format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s") 29 | 30 | # make intersection set for utterance keys 31 | js = [] 32 | intersec_ks = [] 33 | for x in args.jsons: 34 | with open(x, 'r') as f: 35 | j = json.load(f) 36 | ks = j['utts'].keys() 37 | logging.info(x + ': has ' + str(len(ks)) + ' utterances') 38 | if len(intersec_ks) > 0: 39 | intersec_ks = intersec_ks.intersection(set(ks)) 40 | else: 41 | intersec_ks = set(ks) 42 | js.append(j) 43 | logging.info('new json has ' + str(len(intersec_ks)) + ' utterances') 44 | 45 | old_dic = dict() 46 | for k in intersec_ks: 47 | v = js[0]['utts'][k] 48 | for j in js[1:]: 49 | v.update(j['utts'][k]) 50 | old_dic[k] = v 51 | 52 | new_dic = dict() 53 | for id in old_dic: 54 | dic = old_dic[id] 55 | 56 | in_dic = {} 57 | if dic.has_key(unicode('idim', 'utf-8')): 58 | in_dic[unicode('shape', 'utf-8')] = (int(dic[unicode('ilen', 'utf-8')]), int(dic[unicode('idim', 'utf-8')])) 59 | in_dic[unicode('name', 'utf-8')] = unicode('input1', 'utf-8') 60 | in_dic[unicode('feat', 'utf-8')] = dic[unicode('feat', 'utf-8')] 61 | 62 | out_dic = {} 63 | out_dic[unicode('name', 'utf-8')] = unicode('target1', 'utf-8') 64 | out_dic[unicode('shape', 'utf-8')] = (int(dic[unicode('olen', 'utf-8')]), int(dic[unicode('odim', 'utf-8')])) 65 | out_dic[unicode('text', 'utf-8')] = dic[unicode('text', 'utf-8')] 66 | out_dic[unicode('token', 'utf-8')] = dic[unicode('token', 'utf-8')] 67 | out_dic[unicode('tokenid', 'utf-8')] = dic[unicode('tokenid', 'utf-8')] 68 | 69 | 70 | new_dic[id] = {unicode('input', 'utf-8'):[in_dic], unicode('output', 'utf-8'):[out_dic], 71 | unicode('utt2spk', 'utf-8'):dic[unicode('utt2spk', 'utf-8')]} 72 | 73 | # ensure "ensure_ascii=False", which is a bug 74 | jsonstring = json.dumps({'utts': new_dic}, indent=4, ensure_ascii=False, sort_keys=True).encode('utf_8') 75 | print(jsonstring) 76 | -------------------------------------------------------------------------------- /src/utils/scp2json.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # encoding: utf-8 3 | 4 | # Copyright 2017 Johns Hopkins University (Shinji Watanabe) 5 | # Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) 6 | 7 | import sys 8 | import json 9 | import argparse 10 | 11 | if __name__ == '__main__': 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument('--key', '-k', type=str, 14 | help='key') 15 | args = parser.parse_args() 16 | 17 | l = {} 18 | line = sys.stdin.readline() 19 | while line: 20 | x = unicode(line, 'utf_8').rstrip().split() 21 | v = {args.key: ' '.join(x[1:]).encode('utf_8')} 22 | l[x[0].encode('utf_8')] = v 23 | line = sys.stdin.readline() 24 | 25 | all_l = {'utts': l} 26 | 27 | # ensure "ensure_ascii=False", which is a bug 28 | jsonstring = json.dumps(all_l, indent=4, ensure_ascii=False) 29 | print(jsonstring) 30 | -------------------------------------------------------------------------------- /src/utils/text2token.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | 3 | # Copyright 2017 Johns Hopkins University (Shinji Watanabe) 4 | # Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) 5 | 6 | import sys 7 | import argparse 8 | import re 9 | 10 | 11 | def exist_or_not(i, match_pos): 12 | start_pos = None 13 | end_pos = None 14 | for pos in match_pos: 15 | if pos[0] <= i < pos[1]: 16 | start_pos = pos[0] 17 | end_pos = pos[1] 18 | break 19 | 20 | return start_pos, end_pos 21 | 22 | 23 | def main(): 24 | parser = argparse.ArgumentParser() 25 | parser.add_argument('--nchar', '-n', default=1, type=int, 26 | help='number of characters to split, i.e., \ 27 | aabb -> a a b b with -n 1 and aa bb with -n 2') 28 | parser.add_argument('--skip-ncols', '-s', default=0, type=int, 29 | help='skip first n columns') 30 | parser.add_argument('--space', default='', type=str, 31 | help='space symbol') 32 | parser.add_argument('--non-lang-syms', '-l', default=None, type=str, 33 | help='list of non-linguistic symobles, e.g., etc.') 34 | parser.add_argument('text', type=str, default=False, nargs='?', 35 | help='input text') 36 | args = parser.parse_args() 37 | 38 | rs = [] 39 | if args.non_lang_syms is not None: 40 | with open(args.non_lang_syms, 'r') as f: 41 | nls = [unicode(x.rstrip(), 'utf_8') for x in f.readlines()] 42 | rs = [re.compile(re.escape(x)) for x in nls] 43 | 44 | if args.text: 45 | f = open(args.text) 46 | else: 47 | f = sys.stdin 48 | line = f.readline() 49 | n = args.nchar 50 | while line: 51 | x = unicode(line, 'utf_8').split() 52 | print ' '.join(x[:args.skip_ncols]).encode('utf_8'), 53 | a = ' '.join(x[args.skip_ncols:]) 54 | 55 | # get all matched positions 56 | match_pos = [] 57 | for r in rs: 58 | i = 0 59 | while i >= 0: 60 | m = r.search(a, i) 61 | if m: 62 | match_pos.append([m.start(), m.end()]) 63 | i = m.end() 64 | else: 65 | break 66 | 67 | if len(match_pos) > 0: 68 | chars = [] 69 | i = 0 70 | while i < len(a): 71 | start_pos, end_pos = exist_or_not(i, match_pos) 72 | if start_pos is not None: 73 | chars.append(a[start_pos:end_pos]) 74 | i = end_pos 75 | else: 76 | chars.append(a[i]) 77 | i += 1 78 | a = chars 79 | 80 | a = [a[i:i + n] for i in range(0, len(a), n)] 81 | 82 | a_flat = [] 83 | for z in a: 84 | a_flat.append("".join(z)) 85 | 86 | a_chars = [z.replace(' ', args.space) for z in a_flat] 87 | print ' '.join(a_chars).encode('utf_8') 88 | line = f.readline() 89 | 90 | 91 | if __name__ == '__main__': 92 | main() 93 | -------------------------------------------------------------------------------- /src/utils/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | IGNORE_ID = -1 3 | 4 | 5 | def pad_list(xs, pad_value): 6 | # From: espnet/src/nets/e2e_asr_th.py: pad_list() 7 | n_batch = len(xs) 8 | max_len = max(x.size(0) for x in xs) 9 | pad = xs[0].new(n_batch, max_len, * xs[0].size()[1:]).fill_(pad_value) 10 | for i in range(n_batch): 11 | pad[i, :xs[i].size(0)] = xs[i] 12 | return pad 13 | 14 | 15 | def process_dict(dict_path): 16 | with open(dict_path, 'rb') as f: 17 | dictionary = f.readlines() 18 | char_list = [entry.decode('utf-8').split(' ')[0] 19 | for entry in dictionary] 20 | sos_id = char_list.index('') 21 | eos_id = char_list.index('') 22 | return char_list, sos_id, eos_id 23 | 24 | 25 | if __name__ == "__main__": 26 | import sys 27 | path = sys.argv[1] 28 | char_list, sos_id, eos_id = process_dict(path) 29 | print(char_list, sos_id, eos_id) 30 | 31 | # * ------------------ recognition related ------------------ * 32 | 33 | 34 | def parse_hypothesis(hyp, char_list): 35 | """Function to parse hypothesis 36 | 37 | :param list hyp: recognition hypothesis 38 | :param list char_list: list of characters 39 | :return: recognition text strinig 40 | :return: recognition token strinig 41 | :return: recognition tokenid string 42 | """ 43 | # remove sos and get results 44 | tokenid_as_list = list(map(int, hyp['yseq'][1:])) 45 | token_as_list = [char_list[idx] for idx in tokenid_as_list] 46 | score = float(hyp['score']) 47 | 48 | # convert to string 49 | tokenid = " ".join([str(idx) for idx in tokenid_as_list]) 50 | token = " ".join(token_as_list) 51 | text = "".join(token_as_list).replace('', ' ') 52 | 53 | return text, token, tokenid, score 54 | 55 | 56 | def add_results_to_json(js, nbest_hyps, char_list): 57 | """Function to add N-best results to json 58 | 59 | :param dict js: groundtruth utterance dict 60 | :param list nbest_hyps: list of hypothesis 61 | :param list char_list: list of characters 62 | :return: N-best results added utterance dict 63 | """ 64 | # copy old json info 65 | new_js = dict() 66 | new_js['utt2spk'] = js['utt2spk'] 67 | new_js['output'] = [] 68 | 69 | for n, hyp in enumerate(nbest_hyps, 1): 70 | # parse hypothesis 71 | rec_text, rec_token, rec_tokenid, score = parse_hypothesis( 72 | hyp, char_list) 73 | 74 | # copy ground-truth 75 | out_dic = dict(js['output'][0].items()) 76 | 77 | # update name 78 | out_dic['name'] += '[%d]' % n 79 | 80 | # add recognition results 81 | out_dic['rec_text'] = rec_text 82 | out_dic['rec_token'] = rec_token 83 | out_dic['rec_tokenid'] = rec_tokenid 84 | out_dic['score'] = score 85 | 86 | # add to list of N-best result dicts 87 | new_js['output'].append(out_dic) 88 | 89 | # show 1-best result 90 | if n == 1: 91 | print('groundtruth: %s' % out_dic['text']) 92 | print('prediction : %s' % out_dic['rec_text']) 93 | 94 | return new_js 95 | -------------------------------------------------------------------------------- /test/data/data.json: -------------------------------------------------------------------------------- 1 | { 2 | "utts": { 3 | "20040503_222707_A000687_B000688-A-000000-000477": { 4 | "input": [ 5 | { 6 | "feat": "/home/ktxu/workspace/espnet/egs/hkust/asr1/dump/dev/deltafalse/feats.1.ark:48", 7 | "name": "input1", 8 | "shape": [ 9 | 475, 10 | 83 11 | ] 12 | } 13 | ], 14 | "output": [ 15 | { 16 | "name": "target1", 17 | "shape": [ 18 | 7, 19 | 3655 20 | ], 21 | "text": "喂喂你叫什么名", 22 | "token": "喂 喂 你 叫 什 么 名", 23 | "tokenid": "640 640 190 491 123 71 513" 24 | } 25 | ], 26 | "utt2spk": "20040503_222707_A000687_B000688-A" 27 | }, 28 | "20040503_222707_A000687_B000688-A-000656-001110": { 29 | "input": [ 30 | { 31 | "feat": "/home/ktxu/workspace/espnet/egs/hkust/asr1/dump/dev/deltafalse/feats.1.ark:40206", 32 | "name": "input1", 33 | "shape": [ 34 | 452, 35 | 83 36 | ] 37 | } 38 | ], 39 | "output": [ 40 | { 41 | "name": "target1", 42 | "shape": [ 43 | 10, 44 | 3655 45 | ], 46 | "text": "[LAUGHTER]王保文啊你是我姓曾", 47 | "token": "[LAUGHTER] 王 保 文 啊 你 是 我 姓 曾", 48 | "tokenid": "28 2140 220 1514 629 190 1570 1281 852 1601" 49 | } 50 | ], 51 | "utt2spk": "20040503_222707_A000687_B000688-A" 52 | }, 53 | "20040503_222707_A000687_B000688-A-001110-001533": { 54 | "input": [ 55 | { 56 | "feat": "/home/ktxu/workspace/espnet/egs/hkust/asr1/dump/dev/deltafalse/feats.1.ark:78455", 57 | "name": "input1", 58 | "shape": [ 59 | 421, 60 | 83 61 | ] 62 | } 63 | ], 64 | "output": [ 65 | { 66 | "name": "target1", 67 | "shape": [ 68 | 6, 69 | 3655 70 | ], 71 | "text": "我姓曾嗯是啊", 72 | "token": "我 姓 曾 嗯 是 啊", 73 | "tokenid": "1281 852 1601 664 1570 629" 74 | } 75 | ], 76 | "utt2spk": "20040503_222707_A000687_B000688-A" 77 | }, 78 | "20040503_222707_A000687_B000688-A-001533-001915": { 79 | "input": [ 80 | { 81 | "feat": "/home/ktxu/workspace/espnet/egs/hkust/asr1/dump/dev/deltafalse/feats.1.ark:114131", 82 | "name": "input1", 83 | "shape": [ 84 | 380, 85 | 83 86 | ] 87 | } 88 | ], 89 | "output": [ 90 | { 91 | "name": "target1", 92 | "shape": [ 93 | 6, 94 | 3655 95 | ], 96 | "text": "啊住在哪里呀", 97 | "token": "啊 住 在 哪 里 呀", 98 | "tokenid": "629 181 718 606 3326 541" 99 | } 100 | ], 101 | "utt2spk": "20040503_222707_A000687_B000688-A" 102 | }, 103 | "20040503_222707_A000687_B000688-A-001915-002472": { 104 | "input": [ 105 | { 106 | "feat": "/home/ktxu/workspace/espnet/egs/hkust/asr1/dump/dev/deltafalse/feats.1.ark:146404", 107 | "name": "input1", 108 | "shape": [ 109 | 555, 110 | 83 111 | ] 112 | } 113 | ], 114 | "output": [ 115 | { 116 | "name": "target1", 117 | "shape": [ 118 | 8, 119 | 3655 120 | ], 121 | "text": "学校呀在什么学校", 122 | "token": "学 校 呀 在 什 么 学 校", 123 | "tokenid": "908 1688 541 718 123 71 908 1688" 124 | } 125 | ], 126 | "utt2spk": "20040503_222707_A000687_B000688-A" 127 | }, 128 | "20040503_222707_A000687_B000688-A-002472-003202": { 129 | "input": [ 130 | { 131 | "feat": "/home/ktxu/workspace/espnet/egs/hkust/asr1/dump/dev/deltafalse/feats.1.ark:193202", 132 | "name": "input1", 133 | "shape": [ 134 | 728, 135 | 83 136 | ] 137 | } 138 | ], 139 | "output": [ 140 | { 141 | "name": "target1", 142 | "shape": [ 143 | 17, 144 | 3655 145 | ], 146 | "text": "深圳大学呀那环境一定很好了不错啊[LAUGHTER]", 147 | "token": "深 圳 大 学 呀 那 环 境 一 定 很 好 了 不 错 啊 [LAUGHTER]", 148 | "tokenid": "1926 720 796 908 541 3285 2146 771 32 924 1153 827 94 40 3375 629 28" 149 | } 150 | ], 151 | "utt2spk": "20040503_222707_A000687_B000688-A" 152 | }, 153 | "20040503_222707_A000687_B000688-A-003202-003670": { 154 | "input": [ 155 | { 156 | "feat": "/home/ktxu/workspace/espnet/egs/hkust/asr1/dump/dev/deltafalse/feats.1.ark:254359", 157 | "name": "input1", 158 | "shape": [ 159 | 466, 160 | 83 161 | ] 162 | } 163 | ], 164 | "output": [ 165 | { 166 | "name": "target1", 167 | "shape": [ 168 | 14, 169 | 3655 170 | ], 171 | "text": "少一点女生你读的是什么学院啊", 172 | "token": "少 一 点 女 生 你 读 的 是 什 么 学 院 啊", 173 | "tokenid": "970 32 2031 821 2191 190 3049 2251 1570 123 71 908 3437 629" 174 | } 175 | ], 176 | "utt2spk": "20040503_222707_A000687_B000688-A" 177 | }, 178 | "20040503_222707_A000687_B000688-A-003670-004193": { 179 | "input": [ 180 | { 181 | "feat": "/home/ktxu/workspace/espnet/egs/hkust/asr1/dump/dev/deltafalse/feats.1.ark:293770", 182 | "name": "input1", 183 | "shape": [ 184 | 521, 185 | 83 186 | ] 187 | } 188 | ], 189 | "output": [ 190 | { 191 | "name": "target1", 192 | "shape": [ 193 | 8, 194 | 3655 195 | ], 196 | "text": "工程啊挺好呀[NOISE][LAUGHTER]", 197 | "token": "工 程 啊 挺 好 呀 [NOISE] [LAUGHTER]", 198 | "tokenid": "1039 2399 629 1396 827 541 29 28" 199 | } 200 | ], 201 | "utt2spk": "20040503_222707_A000687_B000688-A" 202 | }, 203 | "20040503_222707_A000687_B000688-A-004502-005015": { 204 | "input": [ 205 | { 206 | "feat": "/home/ktxu/workspace/espnet/egs/hkust/asr1/dump/dev/deltafalse/feats.1.ark:337746", 207 | "name": "input1", 208 | "shape": [ 209 | 511, 210 | 83 211 | ] 212 | } 213 | ], 214 | "output": [ 215 | { 216 | "name": "target1", 217 | "shape": [ 218 | 7, 219 | 3655 220 | ], 221 | "text": "[LAUGHTER]先聊下手机吧", 222 | "token": "[LAUGHTER] 先 聊 下 手 机 吧", 223 | "tokenid": "28 277 2633 39 1296 1622 526" 224 | } 225 | ], 226 | "utt2spk": "20040503_222707_A000687_B000688-A" 227 | }, 228 | "20040503_222707_A000687_B000688-A-005015-005503": { 229 | "input": [ 230 | { 231 | "feat": "/home/ktxu/workspace/espnet/egs/hkust/asr1/dump/dev/deltafalse/feats.1.ark:380892", 232 | "name": "input1", 233 | "shape": [ 234 | 486, 235 | 83 236 | ] 237 | } 238 | ], 239 | "output": [ 240 | { 241 | "name": "target1", 242 | "shape": [ 243 | 1, 244 | 3655 245 | ], 246 | "text": "好", 247 | "token": "好", 248 | "tokenid": "827" 249 | } 250 | ], 251 | "utt2spk": "20040503_222707_A000687_B000688-A" 252 | } 253 | } 254 | } 255 | -------------------------------------------------------------------------------- /test/data/train_nodup_sp_units.txt: -------------------------------------------------------------------------------- 1 | 1 2 | A 2 3 | B 3 4 | C 4 5 | D 5 6 | E 6 7 | F 7 8 | G 8 9 | H 9 10 | I 10 11 | J 11 12 | K 12 13 | L 13 14 | M 14 15 | N 15 16 | O 16 17 | P 17 18 | Q 18 19 | R 19 20 | S 20 21 | T 21 22 | U 22 23 | V 23 24 | W 24 25 | X 25 26 | Y 26 27 | Z 27 28 | [LAUGHTER] 28 29 | [NOISE] 29 30 | [VOCALIZED-NOISE] 30 31 | · 31 32 | 一 32 33 | 丁 33 34 | 七 34 35 | 万 35 36 | 丈 36 37 | 三 37 38 | 上 38 39 | 下 39 40 | 不 40 41 | 与 41 42 | 丐 42 43 | 丑 43 44 | 专 44 45 | 且 45 46 | 世 46 47 | 丘 47 48 | 业 48 49 | 丛 49 50 | 东 50 51 | 丝 51 52 | 丞 52 53 | 丢 53 54 | 两 54 55 | 严 55 56 | 丧 56 57 | 个 57 58 | 丫 58 59 | 中 59 60 | 丰 60 61 | 串 61 62 | 临 62 63 | 丸 63 64 | 丹 64 65 | 为 65 66 | 主 66 67 | 丽 67 68 | 举 68 69 | 乃 69 70 | 久 70 71 | 么 71 72 | 义 72 73 | 之 73 74 | 乌 74 75 | 乎 75 76 | 乏 76 77 | 乐 77 78 | 乒 78 79 | 乓 79 80 | 乔 80 81 | 乖 81 82 | 乘 82 83 | 乙 83 84 | 九 84 85 | 乞 85 86 | 也 86 87 | 习 87 88 | 乡 88 89 | 书 89 90 | 买 90 91 | 乱 91 92 | 乳 92 93 | 乾 93 94 | 了 94 95 | 予 95 96 | 争 96 97 | 事 97 98 | 二 98 99 | 于 99 100 | 亏 100 101 | 云 101 102 | 互 102 103 | 五 103 104 | 井 104 105 | 亚 105 106 | 些 106 107 | 亡 107 108 | 亢 108 109 | 交 109 110 | 亦 110 111 | 产 111 112 | 亨 112 113 | 亩 113 114 | 享 114 115 | 京 115 116 | 亭 116 117 | 亮 117 118 | 亲 118 119 | 亳 119 120 | 亵 120 121 | 人 121 122 | 亿 122 123 | 什 123 124 | 仁 124 125 | 仃 125 126 | 仄 126 127 | 仅 127 128 | 仇 128 129 | 今 129 130 | 介 130 131 | 仍 131 132 | 从 132 133 | 仑 133 134 | 仓 134 135 | 仔 135 136 | 他 136 137 | 仗 137 138 | 付 138 139 | 仙 139 140 | 代 140 141 | 令 141 142 | 以 142 143 | 仨 143 144 | 仪 144 145 | 们 145 146 | 仰 146 147 | 仲 147 148 | 件 148 149 | 价 149 150 | 任 150 151 | 份 151 152 | 仿 152 153 | 企 153 154 | 伊 154 155 | 伍 155 156 | 伏 156 157 | 伐 157 158 | 休 158 159 | 众 159 160 | 优 160 161 | 伙 161 162 | 会 162 163 | 伞 163 164 | 伟 164 165 | 传 165 166 | 伤 166 167 | 伦 167 168 | 伪 168 169 | 伯 169 170 | 估 170 171 | 伴 171 172 | 伶 172 173 | 伸 173 174 | 伺 174 175 | 似 175 176 | 伽 176 177 | 佃 177 178 | 但 178 179 | 位 179 180 | 低 180 181 | 住 181 182 | 佐 182 183 | 佑 183 184 | 体 184 185 | 何 185 186 | 佗 186 187 | 余 187 188 | 佛 188 189 | 作 189 190 | 你 190 191 | 佩 191 192 | 佬 192 193 | 佰 193 194 | 佳 194 195 | 使 195 196 | 侃 196 197 | 侄 197 198 | 侈 198 199 | 例 199 200 | 供 200 201 | 依 201 202 | 侠 202 203 | 侣 203 204 | 侥 204 205 | 侦 205 206 | 侧 206 207 | 侨 207 208 | 侮 208 209 | 侯 209 210 | 侵 210 211 | 便 211 212 | 促 212 213 | 俄 213 214 | 俊 214 215 | 俏 215 216 | 俐 216 217 | 俑 217 218 | 俗 218 219 | 俘 219 220 | 保 220 221 | 俞 221 222 | 俟 222 223 | 信 223 224 | 俩 224 225 | 俭 225 226 | 修 226 227 | 俯 227 228 | 俱 228 229 | 俺 229 230 | 倍 230 231 | 倒 231 232 | 倔 232 233 | 倘 233 234 | 候 234 235 | 倚 235 236 | 倜 236 237 | 借 237 238 | 倡 238 239 | 倦 239 240 | 倩 240 241 | 倪 241 242 | 倭 242 243 | 债 243 244 | 值 244 245 | 倾 245 246 | 假 246 247 | 偏 247 248 | 做 248 249 | 停 249 250 | 健 250 251 | 偶 251 252 | 偷 252 253 | 偾 253 254 | 偿 254 255 | 傀 255 256 | 傅 256 257 | 傍 257 258 | 傣 258 259 | 傥 259 260 | 储 260 261 | 催 261 262 | 傲 262 263 | 傻 263 264 | 像 264 265 | 僚 265 266 | 僧 266 267 | 僵 267 268 | 僻 268 269 | 儒 269 270 | 儡 270 271 | 儿 271 272 | 允 272 273 | 元 273 274 | 兄 274 275 | 充 275 276 | 兆 276 277 | 先 277 278 | 光 278 279 | 克 279 280 | 免 280 281 | 兔 281 282 | 党 282 283 | 兜 283 284 | 入 284 285 | 全 285 286 | 八 286 287 | 公 287 288 | 六 288 289 | 兮 289 290 | 兰 290 291 | 共 291 292 | 关 292 293 | 兴 293 294 | 兵 294 295 | 其 295 296 | 具 296 297 | 典 297 298 | 养 298 299 | 兼 299 300 | 兽 300 301 | 内 301 302 | 冉 302 303 | 册 303 304 | 再 304 305 | 冒 305 306 | 冕 306 307 | 写 307 308 | 军 308 309 | 农 309 310 | 冠 310 311 | 冤 311 312 | 冥 312 313 | 冬 313 314 | 冯 314 315 | 冰 315 316 | 冲 316 317 | 决 317 318 | 况 318 319 | 冶 319 320 | 冷 320 321 | 冻 321 322 | 净 322 323 | 凄 323 324 | 准 324 325 | 凉 325 326 | 凋 326 327 | 凌 327 328 | 减 328 329 | 凑 329 330 | 凝 330 331 | 几 331 332 | 凡 332 333 | 凤 333 334 | 凭 334 335 | 凯 335 336 | 凰 336 337 | 凳 337 338 | 凶 338 339 | 凸 339 340 | 出 340 341 | 击 341 342 | 函 342 343 | 凿 343 344 | 刀 344 345 | 刁 345 346 | 分 346 347 | 切 347 348 | 刊 348 349 | 刑 349 350 | 划 350 351 | 列 351 352 | 刘 352 353 | 则 353 354 | 刚 354 355 | 创 355 356 | 初 356 357 | 删 357 358 | 判 358 359 | 刨 359 360 | 利 360 361 | 别 361 362 | 刮 362 363 | 到 363 364 | 制 364 365 | 刷 365 366 | 券 366 367 | 刹 367 368 | 刺 368 369 | 刻 369 370 | 剂 370 371 | 剃 371 372 | 削 372 373 | 前 373 374 | 剑 374 375 | 剔 375 376 | 剖 376 377 | 剥 377 378 | 剧 378 379 | 剩 379 380 | 剪 380 381 | 副 381 382 | 割 382 383 | 剿 383 384 | 劈 384 385 | 力 385 386 | 劝 386 387 | 办 387 388 | 功 388 389 | 加 389 390 | 务 390 391 | 劣 391 392 | 动 392 393 | 助 393 394 | 努 394 395 | 劫 395 396 | 励 396 397 | 劲 397 398 | 劳 398 399 | 势 399 400 | 勃 400 401 | 勇 401 402 | 勉 402 403 | 勋 403 404 | 勒 404 405 | 勤 405 406 | 勺 406 407 | 勾 407 408 | 勿 408 409 | 匀 409 410 | 包 410 411 | 匆 411 412 | 匈 412 413 | 化 413 414 | 北 414 415 | 匙 415 416 | 匝 416 417 | 匠 417 418 | 匡 418 419 | 匪 419 420 | 匹 420 421 | 区 421 422 | 医 422 423 | 匿 423 424 | 十 424 425 | 千 425 426 | 升 426 427 | 午 427 428 | 卉 428 429 | 半 429 430 | 华 430 431 | 协 431 432 | 卑 432 433 | 卒 433 434 | 卓 434 435 | 单 435 436 | 卖 436 437 | 南 437 438 | 博 438 439 | 卜 439 440 | 占 440 441 | 卡 441 442 | 卢 442 443 | 卤 443 444 | 卦 444 445 | 卧 445 446 | 卫 446 447 | 印 447 448 | 危 448 449 | 即 449 450 | 却 450 451 | 卵 451 452 | 卷 452 453 | 卸 453 454 | 厂 454 455 | 厅 455 456 | 历 456 457 | 厉 457 458 | 压 458 459 | 厌 459 460 | 厕 460 461 | 厘 461 462 | 厚 462 463 | 原 463 464 | 厢 464 465 | 厥 465 466 | 厦 466 467 | 厨 467 468 | 去 468 469 | 县 469 470 | 参 470 471 | 又 471 472 | 叉 472 473 | 及 473 474 | 友 474 475 | 双 475 476 | 反 476 477 | 发 477 478 | 叔 478 479 | 取 479 480 | 受 480 481 | 变 481 482 | 叙 482 483 | 叛 483 484 | 叠 484 485 | 口 485 486 | 古 486 487 | 句 487 488 | 另 488 489 | 叨 489 490 | 只 490 491 | 叫 491 492 | 召 492 493 | 叭 493 494 | 叮 494 495 | 可 495 496 | 台 496 497 | 史 497 498 | 右 498 499 | 叶 499 500 | 号 500 501 | 司 501 502 | 叹 502 503 | 叻 503 504 | 叼 504 505 | 叽 505 506 | 吁 506 507 | 吃 507 508 | 各 508 509 | 合 509 510 | 吉 510 511 | 吊 511 512 | 同 512 513 | 名 513 514 | 后 514 515 | 吏 515 516 | 吐 516 517 | 向 517 518 | 吓 518 519 | 吕 519 520 | 吗 520 521 | 君 521 522 | 吝 522 523 | 吞 523 524 | 吟 524 525 | 否 525 526 | 吧 526 527 | 吨 527 528 | 吩 528 529 | 含 529 530 | 听 530 531 | 吭 531 532 | 启 532 533 | 吱 533 534 | 吴 534 535 | 吵 535 536 | 吸 536 537 | 吹 537 538 | 吻 538 539 | 吼 539 540 | 吾 540 541 | 呀 541 542 | 呃 542 543 | 呆 543 544 | 呈 544 545 | 告 545 546 | 呐 546 547 | 呓 547 548 | 呕 548 549 | 呖 549 550 | 呗 550 551 | 员 551 552 | 呛 552 553 | 呜 553 554 | 呢 554 555 | 呦 555 556 | 周 556 557 | 呱 557 558 | 味 558 559 | 呵 559 560 | 呸 560 561 | 呼 561 562 | 命 562 563 | 咋 563 564 | 和 564 565 | 咎 565 566 | 咏 566 567 | 咐 567 568 | 咒 568 569 | 咔 569 570 | 咕 570 571 | 咖 571 572 | 咙 572 573 | 咚 573 574 | 咣 574 575 | 咦 575 576 | 咧 576 577 | 咨 577 578 | 咪 578 579 | 咬 579 580 | 咯 580 581 | 咱 581 582 | 咳 582 583 | 咸 583 584 | 咽 584 585 | 咿 585 586 | 哀 586 587 | 品 587 588 | 哄 588 589 | 哆 589 590 | 哇 590 591 | 哈 591 592 | 哉 592 593 | 响 593 594 | 哎 594 595 | 哐 595 596 | 哑 596 597 | 哒 597 598 | 哓 598 599 | 哗 599 600 | 哟 600 601 | 哥 601 602 | 哦 602 603 | 哧 603 604 | 哨 604 605 | 哩 605 606 | 哪 606 607 | 哭 607 608 | 哮 608 609 | 哲 609 610 | 哼 610 611 | 哽 611 612 | 唇 612 613 | 唉 613 614 | 唏 614 615 | 唐 615 616 | 唔 616 617 | 唠 617 618 | 唤 618 619 | 唧 619 620 | 唬 620 621 | 售 621 622 | 唯 622 623 | 唰 623 624 | 唱 624 625 | 唷 625 626 | 唾 626 627 | 啃 627 628 | 商 628 629 | 啊 629 630 | 啡 630 631 | 啤 631 632 | 啥 632 633 | 啦 633 634 | 啪 634 635 | 啬 635 636 | 啰 636 637 | 啵 637 638 | 啷 638 639 | 喀 639 640 | 喂 640 641 | 善 641 642 | 喆 642 643 | 喇 643 644 | 喉 644 645 | 喊 645 646 | 喏 646 647 | 喔 647 648 | 喘 648 649 | 喜 649 650 | 喝 650 651 | 喧 651 652 | 喱 652 653 | 喳 653 654 | 喷 654 655 | 喻 655 656 | 喽 656 657 | 嗑 657 658 | 嗓 658 659 | 嗜 659 660 | 嗝 660 661 | 嗡 661 662 | 嗦 662 663 | 嗨 663 664 | 嗯 664 665 | 嗲 665 666 | 嗷 666 667 | 嗽 667 668 | 嘀 668 669 | 嘈 669 670 | 嘉 670 671 | 嘎 671 672 | 嘘 672 673 | 嘛 673 674 | 嘞 674 675 | 嘟 675 676 | 嘠 676 677 | 嘣 677 678 | 嘭 678 679 | 嘱 679 680 | 嘲 680 681 | 嘴 681 682 | 嘶 682 683 | 嘻 683 684 | 嘿 684 685 | 噌 685 686 | 噎 686 687 | 噔 687 688 | 噜 688 689 | 噢 689 690 | 器 690 691 | 噪 691 692 | 噻 692 693 | 噼 693 694 | 嚓 694 695 | 嚣 695 696 | 嚤 696 697 | 嚯 697 698 | 嚷 698 699 | 嚼 699 700 | 囊 700 701 | 囚 701 702 | 四 702 703 | 回 703 704 | 因 704 705 | 团 705 706 | 囫 706 707 | 园 707 708 | 困 708 709 | 围 709 710 | 囵 710 711 | 固 711 712 | 国 712 713 | 图 713 714 | 圆 714 715 | 圈 715 716 | 土 716 717 | 圣 717 718 | 在 718 719 | 地 719 720 | 圳 720 721 | 场 721 722 | 圾 722 723 | 址 723 724 | 均 724 725 | 坊 725 726 | 坎 726 727 | 坏 727 728 | 坐 728 729 | 坑 729 730 | 块 730 731 | 坚 731 732 | 坛 732 733 | 坝 733 734 | 坞 734 735 | 坟 735 736 | 坠 736 737 | 坡 737 738 | 坤 738 739 | 坦 739 740 | 坪 740 741 | 坷 741 742 | 垂 742 743 | 垃 743 744 | 垄 744 745 | 型 745 746 | 垒 746 747 | 垢 747 748 | 垫 748 749 | 垮 749 750 | 埃 750 751 | 埋 751 752 | 城 752 753 | 域 753 754 | 埠 754 755 | 培 755 756 | 基 756 757 | 堂 757 758 | 堆 758 759 | 堑 759 760 | 堕 760 761 | 堡 761 762 | 堤 762 763 | 堪 763 764 | 堵 764 765 | 塌 765 766 | 塑 766 767 | 塔 767 768 | 塘 768 769 | 塞 769 770 | 填 770 771 | 境 771 772 | 墅 772 773 | 墉 773 774 | 墓 774 775 | 墙 775 776 | 增 776 777 | 墨 777 778 | 墩 778 779 | 壁 779 780 | 壕 780 781 | 壤 781 782 | 士 782 783 | 壮 783 784 | 声 784 785 | 壳 785 786 | 壶 786 787 | 处 787 788 | 备 788 789 | 复 789 790 | 夏 790 791 | 夕 791 792 | 外 792 793 | 多 793 794 | 夜 794 795 | 够 795 796 | 大 796 797 | 天 797 798 | 太 798 799 | 夫 799 800 | 央 800 801 | 失 801 802 | 头 802 803 | 夷 803 804 | 夸 804 805 | 夹 805 806 | 夺 806 807 | 奇 807 808 | 奈 808 809 | 奉 809 810 | 奋 810 811 | 奏 811 812 | 契 812 813 | 奔 813 814 | 奕 814 815 | 奖 815 816 | 套 816 817 | 奘 817 818 | 奠 818 819 | 奢 819 820 | 奥 820 821 | 女 821 822 | 奴 822 823 | 奶 823 824 | 奷 824 825 | 奸 825 826 | 她 826 827 | 好 827 828 | 如 828 829 | 妃 829 830 | 妄 830 831 | 妆 831 832 | 妇 832 833 | 妈 833 834 | 妍 834 835 | 妒 835 836 | 妓 836 837 | 妖 837 838 | 妙 838 839 | 妞 839 840 | 妥 840 841 | 妨 841 842 | 妮 842 843 | 妲 843 844 | 妹 844 845 | 妻 845 846 | 妾 846 847 | 姆 847 848 | 姊 848 849 | 始 849 850 | 姐 850 851 | 姑 851 852 | 姓 852 853 | 委 853 854 | 姚 854 855 | 姜 855 856 | 姝 856 857 | 姣 857 858 | 姥 858 859 | 姨 859 860 | 姬 860 861 | 姻 861 862 | 姿 862 863 | 威 863 864 | 娃 864 865 | 娇 865 866 | 娘 866 867 | 娜 867 868 | 娟 868 869 | 娥 869 870 | 娱 870 871 | 娴 871 872 | 娶 872 873 | 娼 873 874 | 婆 874 875 | 婉 875 876 | 婊 876 877 | 婚 877 878 | 婪 878 879 | 婴 879 880 | 婷 880 881 | 婿 881 882 | 媒 882 883 | 媚 883 884 | 媛 884 885 | 媲 885 886 | 媳 886 887 | 嫁 887 888 | 嫂 888 889 | 嫉 889 890 | 嫌 890 891 | 嫖 891 892 | 嫣 892 893 | 嫦 893 894 | 嫩 894 895 | 嬅 895 896 | 嬉 896 897 | 子 897 898 | 孔 898 899 | 孕 899 900 | 字 900 901 | 存 901 902 | 孙 902 903 | 孜 903 904 | 孝 904 905 | 孟 905 906 | 季 906 907 | 孤 907 908 | 学 908 909 | 孩 909 910 | 孰 910 911 | 孽 911 912 | 宁 912 913 | 它 913 914 | 宅 914 915 | 宇 915 916 | 守 916 917 | 安 917 918 | 宋 918 919 | 完 919 920 | 宏 920 921 | 宗 921 922 | 官 922 923 | 宙 923 924 | 定 924 925 | 宛 925 926 | 宜 926 927 | 宝 927 928 | 实 928 929 | 宠 929 930 | 审 930 931 | 客 931 932 | 宣 932 933 | 室 933 934 | 宦 934 935 | 宪 935 936 | 宫 936 937 | 宰 937 938 | 害 938 939 | 宴 939 940 | 宵 940 941 | 家 941 942 | 容 942 943 | 宽 943 944 | 宾 944 945 | 宿 945 946 | 寂 946 947 | 寄 947 948 | 密 948 949 | 寇 949 950 | 富 950 951 | 寒 951 952 | 寓 952 953 | 寝 953 954 | 寞 954 955 | 察 955 956 | 寡 956 957 | 寥 957 958 | 寨 958 959 | 寸 959 960 | 对 960 961 | 寺 961 962 | 寻 962 963 | 导 963 964 | 寿 964 965 | 封 965 966 | 射 966 967 | 将 967 968 | 尊 968 969 | 小 969 970 | 少 970 971 | 尔 971 972 | 尖 972 973 | 尘 973 974 | 尚 974 975 | 尝 975 976 | 尢 976 977 | 尤 977 978 | 尬 978 979 | 就 979 980 | 尴 980 981 | 尸 981 982 | 尹 982 983 | 尺 983 984 | 尼 984 985 | 尽 985 986 | 尾 986 987 | 尿 987 988 | 局 988 989 | 屁 989 990 | 层 990 991 | 居 991 992 | 屈 992 993 | 届 993 994 | 屋 994 995 | 屌 995 996 | 屎 996 997 | 屏 997 998 | 展 998 999 | 属 999 1000 | 屠 1000 1001 | 屡 1001 1002 | 履 1002 1003 | 屯 1003 1004 | 山 1004 1005 | 屹 1005 1006 | 屿 1006 1007 | 岁 1007 1008 | 岂 1008 1009 | 岔 1009 1010 | 岗 1010 1011 | 岚 1011 1012 | 岛 1012 1013 | 岩 1013 1014 | 岭 1014 1015 | 岳 1015 1016 | 岸 1016 1017 | 峙 1017 1018 | 峡 1018 1019 | 峨 1019 1020 | 峪 1020 1021 | 峭 1021 1022 | 峰 1022 1023 | 峻 1023 1024 | 崂 1024 1025 | 崇 1025 1026 | 崎 1026 1027 | 崔 1027 1028 | 崖 1028 1029 | 崛 1029 1030 | 崩 1030 1031 | 嵋 1031 1032 | 嵩 1032 1033 | 嵬 1033 1034 | 巍 1034 1035 | 川 1035 1036 | 州 1036 1037 | 巡 1037 1038 | 巢 1038 1039 | 工 1039 1040 | 左 1040 1041 | 巧 1041 1042 | 巨 1042 1043 | 巩 1043 1044 | 巫 1044 1045 | 差 1045 1046 | 己 1046 1047 | 已 1047 1048 | 巴 1048 1049 | 巷 1049 1050 | 巾 1050 1051 | 币 1051 1052 | 市 1052 1053 | 布 1053 1054 | 帅 1054 1055 | 帆 1055 1056 | 师 1056 1057 | 希 1057 1058 | 帐 1058 1059 | 帕 1059 1060 | 帖 1060 1061 | 帘 1061 1062 | 帜 1062 1063 | 帝 1063 1064 | 带 1064 1065 | 席 1065 1066 | 帮 1066 1067 | 常 1067 1068 | 帼 1068 1069 | 帽 1069 1070 | 幅 1070 1071 | 幌 1071 1072 | 幕 1072 1073 | 幢 1073 1074 | 干 1074 1075 | 平 1075 1076 | 年 1076 1077 | 并 1077 1078 | 幸 1078 1079 | 幺 1079 1080 | 幻 1080 1081 | 幼 1081 1082 | 幽 1082 1083 | 广 1083 1084 | 庄 1084 1085 | 庆 1085 1086 | 庇 1086 1087 | 床 1087 1088 | 序 1088 1089 | 庐 1089 1090 | 库 1090 1091 | 应 1091 1092 | 底 1092 1093 | 店 1093 1094 | 庙 1094 1095 | 庚 1095 1096 | 府 1096 1097 | 庞 1097 1098 | 废 1098 1099 | 度 1099 1100 | 座 1100 1101 | 庭 1101 1102 | 庵 1102 1103 | 庶 1103 1104 | 康 1104 1105 | 庸 1105 1106 | 廉 1106 1107 | 廊 1107 1108 | 廓 1108 1109 | 廖 1109 1110 | 延 1110 1111 | 廷 1111 1112 | 建 1112 1113 | 开 1113 1114 | 异 1114 1115 | 弃 1115 1116 | 弄 1116 1117 | 弊 1117 1118 | 式 1118 1119 | 弓 1119 1120 | 引 1120 1121 | 弗 1121 1122 | 弘 1122 1123 | 弟 1123 1124 | 张 1124 1125 | 弥 1125 1126 | 弦 1126 1127 | 弧 1127 1128 | 弩 1128 1129 | 弯 1129 1130 | 弱 1130 1131 | 弹 1131 1132 | 强 1132 1133 | 归 1133 1134 | 当 1134 1135 | 录 1135 1136 | 彗 1136 1137 | 彝 1137 1138 | 形 1138 1139 | 彤 1139 1140 | 彦 1140 1141 | 彩 1141 1142 | 彪 1142 1143 | 彬 1143 1144 | 彭 1144 1145 | 影 1145 1146 | 役 1146 1147 | 彻 1147 1148 | 彼 1148 1149 | 往 1149 1150 | 征 1150 1151 | 径 1151 1152 | 待 1152 1153 | 很 1153 1154 | 徊 1154 1155 | 律 1155 1156 | 徐 1156 1157 | 徒 1157 1158 | 得 1158 1159 | 徘 1159 1160 | 徙 1160 1161 | 御 1161 1162 | 循 1162 1163 | 微 1163 1164 | 德 1164 1165 | 徽 1165 1166 | 心 1166 1167 | 必 1167 1168 | 忆 1168 1169 | 忌 1169 1170 | 忍 1170 1171 | 忏 1171 1172 | 忒 1172 1173 | 志 1173 1174 | 忘 1174 1175 | 忙 1175 1176 | 忠 1176 1177 | 忧 1177 1178 | 快 1178 1179 | 念 1179 1180 | 忽 1180 1181 | 忿 1181 1182 | 怀 1182 1183 | 态 1183 1184 | 怅 1184 1185 | 怆 1185 1186 | 怎 1186 1187 | 怒 1187 1188 | 怕 1188 1189 | 怖 1189 1190 | 怜 1190 1191 | 思 1191 1192 | 怠 1192 1193 | 怡 1193 1194 | 急 1194 1195 | 性 1195 1196 | 怨 1196 1197 | 怪 1197 1198 | 总 1198 1199 | 恋 1199 1200 | 恍 1200 1201 | 恐 1201 1202 | 恒 1202 1203 | 恕 1203 1204 | 恢 1204 1205 | 恤 1205 1206 | 恨 1206 1207 | 恩 1207 1208 | 恭 1208 1209 | 息 1209 1210 | 恰 1210 1211 | 恳 1211 1212 | 恶 1212 1213 | 恺 1213 1214 | 恼 1214 1215 | 恿 1215 1216 | 悄 1216 1217 | 悉 1217 1218 | 悍 1218 1219 | 悔 1219 1220 | 悚 1220 1221 | 悟 1221 1222 | 悠 1222 1223 | 患 1223 1224 | 悦 1224 1225 | 您 1225 1226 | 悬 1226 1227 | 悲 1227 1228 | 悴 1228 1229 | 情 1229 1230 | 惊 1230 1231 | 惋 1231 1232 | 惑 1232 1233 | 惕 1233 1234 | 惜 1234 1235 | 惠 1235 1236 | 惦 1236 1237 | 惧 1237 1238 | 惨 1238 1239 | 惩 1239 1240 | 惫 1240 1241 | 惬 1241 1242 | 惭 1242 1243 | 惮 1243 1244 | 惯 1244 1245 | 惰 1245 1246 | 想 1246 1247 | 惹 1247 1248 | 愁 1248 1249 | 愉 1249 1250 | 意 1250 1251 | 愚 1251 1252 | 感 1252 1253 | 愣 1253 1254 | 愤 1254 1255 | 愧 1255 1256 | 愿 1256 1257 | 慈 1257 1258 | 慌 1258 1259 | 慎 1259 1260 | 慑 1260 1261 | 慕 1261 1262 | 慢 1262 1263 | 慧 1263 1264 | 慨 1264 1265 | 慰 1265 1266 | 慷 1266 1267 | 憋 1267 1268 | 憎 1268 1269 | 憔 1269 1270 | 憧 1270 1271 | 憬 1271 1272 | 憾 1272 1273 | 懂 1273 1274 | 懈 1274 1275 | 懒 1275 1276 | 懦 1276 1277 | 懵 1277 1278 | 戈 1278 1279 | 戏 1279 1280 | 成 1280 1281 | 我 1281 1282 | 戒 1282 1283 | 或 1283 1284 | 战 1284 1285 | 戚 1285 1286 | 截 1286 1287 | 戳 1287 1288 | 戴 1288 1289 | 户 1289 1290 | 房 1290 1291 | 所 1291 1292 | 扁 1292 1293 | 扇 1293 1294 | 扈 1294 1295 | 扉 1295 1296 | 手 1296 1297 | 才 1297 1298 | 扎 1298 1299 | 扑 1299 1300 | 扒 1300 1301 | 打 1301 1302 | 扔 1302 1303 | 托 1303 1304 | 扛 1304 1305 | 扣 1305 1306 | 执 1306 1307 | 扩 1307 1308 | 扫 1308 1309 | 扬 1309 1310 | 扭 1310 1311 | 扮 1311 1312 | 扯 1312 1313 | 扰 1313 1314 | 扳 1314 1315 | 扶 1315 1316 | 批 1316 1317 | 扼 1317 1318 | 找 1318 1319 | 承 1319 1320 | 技 1320 1321 | 抄 1321 1322 | 抉 1322 1323 | 把 1323 1324 | 抑 1324 1325 | 抒 1325 1326 | 抓 1326 1327 | 投 1327 1328 | 抖 1328 1329 | 抗 1329 1330 | 折 1330 1331 | 抚 1331 1332 | 抛 1332 1333 | 抠 1333 1334 | 抢 1334 1335 | 护 1335 1336 | 报 1336 1337 | 披 1337 1338 | 抬 1338 1339 | 抱 1339 1340 | 抵 1340 1341 | 抹 1341 1342 | 押 1342 1343 | 抽 1343 1344 | 担 1344 1345 | 拆 1345 1346 | 拉 1346 1347 | 拌 1347 1348 | 拍 1348 1349 | 拎 1349 1350 | 拐 1350 1351 | 拒 1351 1352 | 拓 1352 1353 | 拔 1353 1354 | 拖 1354 1355 | 拘 1355 1356 | 拙 1356 1357 | 招 1357 1358 | 拜 1358 1359 | 拟 1359 1360 | 拢 1360 1361 | 拣 1361 1362 | 拥 1362 1363 | 拦 1363 1364 | 拧 1364 1365 | 拨 1365 1366 | 择 1366 1367 | 括 1367 1368 | 拭 1368 1369 | 拮 1369 1370 | 拯 1370 1371 | 拳 1371 1372 | 拴 1372 1373 | 拷 1373 1374 | 拼 1374 1375 | 拽 1375 1376 | 拾 1376 1377 | 拿 1377 1378 | 持 1378 1379 | 挂 1379 1380 | 指 1380 1381 | 按 1381 1382 | 挎 1382 1383 | 挑 1383 1384 | 挖 1384 1385 | 挚 1385 1386 | 挟 1386 1387 | 挠 1387 1388 | 挡 1388 1389 | 挣 1389 1390 | 挤 1390 1391 | 挥 1391 1392 | 挨 1392 1393 | 挪 1393 1394 | 挫 1394 1395 | 振 1395 1396 | 挺 1396 1397 | 挽 1397 1398 | 捂 1398 1399 | 捅 1399 1400 | 捆 1400 1401 | 捉 1401 1402 | 捍 1402 1403 | 捎 1403 1404 | 捏 1404 1405 | 捐 1405 1406 | 捕 1406 1407 | 捞 1407 1408 | 损 1408 1409 | 捡 1409 1410 | 换 1410 1411 | 捣 1411 1412 | 捧 1412 1413 | 据 1413 1414 | 捱 1414 1415 | 捶 1415 1416 | 捷 1416 1417 | 掀 1417 1418 | 授 1418 1419 | 掉 1419 1420 | 掌 1420 1421 | 掏 1421 1422 | 掐 1422 1423 | 排 1423 1424 | 掘 1424 1425 | 掠 1425 1426 | 探 1426 1427 | 接 1427 1428 | 控 1428 1429 | 推 1429 1430 | 掩 1430 1431 | 措 1431 1432 | 掰 1432 1433 | 掷 1433 1434 | 掺 1434 1435 | 揉 1435 1436 | 揍 1436 1437 | 描 1437 1438 | 提 1438 1439 | 插 1439 1440 | 握 1440 1441 | 揣 1441 1442 | 揪 1442 1443 | 揭 1443 1444 | 援 1444 1445 | 揽 1445 1446 | 搁 1446 1447 | 搂 1447 1448 | 搅 1448 1449 | 搏 1449 1450 | 搓 1450 1451 | 搛 1451 1452 | 搜 1452 1453 | 搞 1453 1454 | 搪 1454 1455 | 搬 1455 1456 | 搭 1456 1457 | 携 1457 1458 | 搽 1458 1459 | 摁 1459 1460 | 摄 1460 1461 | 摆 1461 1462 | 摇 1462 1463 | 摈 1463 1464 | 摊 1464 1465 | 摔 1465 1466 | 摘 1466 1467 | 摞 1467 1468 | 摧 1468 1469 | 摩 1469 1470 | 摸 1470 1471 | 撂 1471 1472 | 撇 1472 1473 | 撑 1473 1474 | 撒 1474 1475 | 撕 1475 1476 | 撞 1476 1477 | 撤 1477 1478 | 撩 1478 1479 | 撬 1479 1480 | 播 1480 1481 | 撮 1481 1482 | 撰 1482 1483 | 撵 1483 1484 | 撼 1484 1485 | 擀 1485 1486 | 擅 1486 1487 | 操 1487 1488 | 擦 1488 1489 | 攀 1489 1490 | 攒 1490 1491 | 攥 1491 1492 | 支 1492 1493 | 收 1493 1494 | 改 1494 1495 | 攻 1495 1496 | 放 1496 1497 | 政 1497 1498 | 故 1498 1499 | 效 1499 1500 | 敌 1500 1501 | 敏 1501 1502 | 救 1502 1503 | 教 1503 1504 | 敛 1504 1505 | 敞 1505 1506 | 敢 1506 1507 | 散 1507 1508 | 敦 1508 1509 | 敬 1509 1510 | 数 1510 1511 | 敲 1511 1512 | 整 1512 1513 | 敷 1513 1514 | 文 1514 1515 | 斋 1515 1516 | 斌 1516 1517 | 斐 1517 1518 | 斑 1518 1519 | 斓 1519 1520 | 斗 1520 1521 | 料 1521 1522 | 斜 1522 1523 | 斤 1523 1524 | 斥 1524 1525 | 斧 1525 1526 | 斩 1526 1527 | 断 1527 1528 | 斯 1528 1529 | 新 1529 1530 | 方 1530 1531 | 施 1531 1532 | 旁 1532 1533 | 旅 1533 1534 | 旋 1534 1535 | 旎 1535 1536 | 族 1536 1537 | 旖 1537 1538 | 旗 1538 1539 | 无 1539 1540 | 既 1540 1541 | 日 1541 1542 | 旦 1542 1543 | 旧 1543 1544 | 旨 1544 1545 | 早 1545 1546 | 旬 1546 1547 | 旭 1547 1548 | 旮 1548 1549 | 旯 1549 1550 | 旱 1550 1551 | 时 1551 1552 | 旷 1552 1553 | 旺 1553 1554 | 昂 1554 1555 | 昆 1555 1556 | 昊 1556 1557 | 昌 1557 1558 | 明 1558 1559 | 昏 1559 1560 | 易 1560 1561 | 昔 1561 1562 | 昕 1562 1563 | 昙 1563 1564 | 星 1564 1565 | 映 1565 1566 | 春 1566 1567 | 昧 1567 1568 | 昨 1568 1569 | 昭 1569 1570 | 是 1570 1571 | 昵 1571 1572 | 昼 1572 1573 | 显 1573 1574 | 晃 1574 1575 | 晋 1575 1576 | 晏 1576 1577 | 晒 1577 1578 | 晓 1578 1579 | 晕 1579 1580 | 晚 1580 1581 | 晦 1581 1582 | 晨 1582 1583 | 普 1583 1584 | 景 1584 1585 | 晰 1585 1586 | 晴 1586 1587 | 晶 1587 1588 | 智 1588 1589 | 晾 1589 1590 | 暂 1590 1591 | 暇 1591 1592 | 暑 1592 1593 | 暖 1593 1594 | 暗 1594 1595 | 暧 1595 1596 | 暴 1596 1597 | 曲 1597 1598 | 更 1598 1599 | 曹 1599 1600 | 曼 1600 1601 | 曾 1601 1602 | 替 1602 1603 | 最 1603 1604 | 月 1604 1605 | 有 1605 1606 | 朋 1606 1607 | 服 1607 1608 | 朔 1608 1609 | 朗 1609 1610 | 望 1610 1611 | 朝 1611 1612 | 期 1612 1613 | 朦 1613 1614 | 木 1614 1615 | 未 1615 1616 | 末 1616 1617 | 本 1617 1618 | 术 1618 1619 | 朱 1619 1620 | 朴 1620 1621 | 朵 1621 1622 | 机 1622 1623 | 杀 1623 1624 | 杂 1624 1625 | 权 1625 1626 | 杆 1626 1627 | 杉 1627 1628 | 李 1628 1629 | 杏 1629 1630 | 材 1630 1631 | 村 1631 1632 | 杖 1632 1633 | 杜 1633 1634 | 杞 1634 1635 | 束 1635 1636 | 杠 1636 1637 | 条 1637 1638 | 来 1638 1639 | 杨 1639 1640 | 杭 1640 1641 | 杯 1641 1642 | 杰 1642 1643 | 杳 1643 1644 | 杵 1644 1645 | 松 1645 1646 | 板 1646 1647 | 极 1647 1648 | 构 1648 1649 | 枉 1649 1650 | 析 1650 1651 | 枕 1651 1652 | 林 1652 1653 | 枚 1653 1654 | 果 1654 1655 | 枝 1655 1656 | 枢 1656 1657 | 枣 1657 1658 | 枪 1658 1659 | 枫 1659 1660 | 枭 1660 1661 | 枯 1661 1662 | 架 1662 1663 | 枷 1663 1664 | 柄 1664 1665 | 柏 1665 1666 | 某 1666 1667 | 染 1667 1668 | 柔 1668 1669 | 柚 1669 1670 | 柜 1670 1671 | 柠 1671 1672 | 查 1672 1673 | 柯 1673 1674 | 柱 1674 1675 | 柳 1675 1676 | 柴 1676 1677 | 柿 1677 1678 | 栀 1678 1679 | 栅 1679 1680 | 标 1680 1681 | 栈 1681 1682 | 栋 1682 1683 | 栏 1683 1684 | 树 1684 1685 | 栓 1685 1686 | 栖 1686 1687 | 栗 1687 1688 | 校 1688 1689 | 株 1689 1690 | 样 1690 1691 | 核 1691 1692 | 根 1692 1693 | 格 1693 1694 | 栽 1694 1695 | 桂 1695 1696 | 桃 1696 1697 | 框 1697 1698 | 案 1698 1699 | 桌 1699 1700 | 桐 1700 1701 | 桑 1701 1702 | 桔 1702 1703 | 桢 1703 1704 | 档 1704 1705 | 桥 1705 1706 | 桦 1706 1707 | 桨 1707 1708 | 桩 1708 1709 | 桶 1709 1710 | 梁 1710 1711 | 梅 1711 1712 | 梗 1712 1713 | 梦 1713 1714 | 梨 1714 1715 | 梭 1715 1716 | 梯 1716 1717 | 械 1717 1718 | 梳 1718 1719 | 检 1719 1720 | 棉 1720 1721 | 棋 1721 1722 | 棍 1722 1723 | 棒 1723 1724 | 棘 1724 1725 | 棚 1725 1726 | 棠 1726 1727 | 棣 1727 1728 | 森 1728 1729 | 棱 1729 1730 | 棵 1730 1731 | 椅 1731 1732 | 植 1732 1733 | 椎 1733 1734 | 椒 1734 1735 | 椭 1735 1736 | 椰 1736 1737 | 楂 1737 1738 | 楚 1738 1739 | 楠 1739 1740 | 楼 1740 1741 | 概 1741 1742 | 榄 1742 1743 | 榆 1743 1744 | 榕 1744 1745 | 榜 1745 1746 | 榨 1746 1747 | 榴 1747 1748 | 槐 1748 1749 | 槛 1749 1750 | 槟 1750 1751 | 槽 1751 1752 | 樊 1752 1753 | 樟 1753 1754 | 模 1754 1755 | 横 1755 1756 | 樱 1756 1757 | 橄 1757 1758 | 橇 1758 1759 | 橙 1759 1760 | 橱 1760 1761 | 檐 1761 1762 | 檞 1762 1763 | 檬 1763 1764 | 欠 1764 1765 | 次 1765 1766 | 欢 1766 1767 | 欣 1767 1768 | 欧 1768 1769 | 欲 1769 1770 | 欺 1770 1771 | 款 1771 1772 | 歇 1772 1773 | 歉 1773 1774 | 歌 1774 1775 | 止 1775 1776 | 正 1776 1777 | 此 1777 1778 | 步 1778 1779 | 武 1779 1780 | 歧 1780 1781 | 歪 1781 1782 | 歹 1782 1783 | 死 1783 1784 | 殊 1784 1785 | 残 1785 1786 | 殖 1786 1787 | 殴 1787 1788 | 段 1788 1789 | 殷 1789 1790 | 殿 1790 1791 | 毁 1791 1792 | 毅 1792 1793 | 母 1793 1794 | 每 1794 1795 | 毒 1795 1796 | 比 1796 1797 | 毕 1797 1798 | 毙 1798 1799 | 毛 1799 1800 | 毡 1800 1801 | 毫 1801 1802 | 毯 1802 1803 | 氏 1803 1804 | 民 1804 1805 | 氓 1805 1806 | 气 1806 1807 | 氛 1807 1808 | 氧 1808 1809 | 氮 1809 1810 | 水 1810 1811 | 永 1811 1812 | 汁 1812 1813 | 求 1813 1814 | 汇 1814 1815 | 汉 1815 1816 | 汗 1816 1817 | 江 1817 1818 | 池 1818 1819 | 污 1819 1820 | 汤 1820 1821 | 汪 1821 1822 | 汰 1822 1823 | 汹 1823 1824 | 汽 1824 1825 | 沃 1825 1826 | 沈 1826 1827 | 沉 1827 1828 | 沐 1828 1829 | 沓 1829 1830 | 沙 1830 1831 | 沛 1831 1832 | 沟 1832 1833 | 没 1833 1834 | 沥 1834 1835 | 沦 1835 1836 | 沧 1836 1837 | 沫 1837 1838 | 沮 1838 1839 | 河 1839 1840 | 沸 1840 1841 | 油 1841 1842 | 治 1842 1843 | 沼 1843 1844 | 沾 1844 1845 | 沿 1845 1846 | 泄 1846 1847 | 泉 1847 1848 | 泊 1848 1849 | 泌 1849 1850 | 法 1850 1851 | 泛 1851 1852 | 泞 1852 1853 | 泡 1853 1854 | 波 1854 1855 | 泥 1855 1856 | 注 1856 1857 | 泪 1857 1858 | 泯 1858 1859 | 泰 1859 1860 | 泱 1860 1861 | 泳 1861 1862 | 泸 1862 1863 | 泼 1863 1864 | 泽 1864 1865 | 洁 1865 1866 | 洋 1866 1867 | 洒 1867 1868 | 洗 1868 1869 | 洛 1869 1870 | 洞 1870 1871 | 津 1871 1872 | 洪 1872 1873 | 洲 1873 1874 | 活 1874 1875 | 洽 1875 1876 | 派 1876 1877 | 流 1877 1878 | 浅 1878 1879 | 浆 1879 1880 | 浇 1880 1881 | 浊 1881 1882 | 测 1882 1883 | 浍 1883 1884 | 济 1884 1885 | 浏 1885 1886 | 浑 1886 1887 | 浒 1887 1888 | 浓 1888 1889 | 浙 1889 1890 | 浜 1890 1891 | 浦 1891 1892 | 浩 1892 1893 | 浪 1893 1894 | 浮 1894 1895 | 浴 1895 1896 | 海 1896 1897 | 浸 1897 1898 | 涂 1898 1899 | 涅 1899 1900 | 消 1900 1901 | 涉 1901 1902 | 涌 1902 1903 | 涎 1903 1904 | 涕 1904 1905 | 涛 1905 1906 | 涝 1906 1907 | 涡 1907 1908 | 涣 1908 1909 | 润 1909 1910 | 涨 1910 1911 | 涮 1911 1912 | 涯 1912 1913 | 液 1913 1914 | 涵 1914 1915 | 淀 1915 1916 | 淆 1916 1917 | 淇 1917 1918 | 淋 1918 1919 | 淌 1919 1920 | 淑 1920 1921 | 淘 1921 1922 | 淞 1922 1923 | 淡 1923 1924 | 淫 1924 1925 | 淮 1925 1926 | 深 1926 1927 | 淳 1927 1928 | 混 1928 1929 | 淹 1929 1930 | 添 1930 1931 | 清 1931 1932 | 渊 1932 1933 | 渎 1933 1934 | 渐 1934 1935 | 渔 1935 1936 | 渗 1936 1937 | 渝 1937 1938 | 渠 1938 1939 | 渡 1939 1940 | 渣 1940 1941 | 渤 1941 1942 | 温 1942 1943 | 港 1943 1944 | 渲 1944 1945 | 渴 1945 1946 | 游 1946 1947 | 渺 1947 1948 | 湃 1948 1949 | 湖 1949 1950 | 湘 1950 1951 | 湛 1951 1952 | 湾 1952 1953 | 湿 1953 1954 | 溃 1954 1955 | 溅 1955 1956 | 源 1956 1957 | 溜 1957 1958 | 溢 1958 1959 | 溥 1959 1960 | 溪 1960 1961 | 溶 1961 1962 | 溺 1962 1963 | 滁 1963 1964 | 滋 1964 1965 | 滑 1965 1966 | 滔 1966 1967 | 滕 1967 1968 | 滚 1968 1969 | 滞 1969 1970 | 满 1970 1971 | 滤 1971 1972 | 滥 1972 1973 | 滨 1973 1974 | 滩 1974 1975 | 滴 1975 1976 | 漂 1976 1977 | 漆 1977 1978 | 漏 1978 1979 | 漓 1979 1980 | 演 1980 1981 | 漠 1981 1982 | 漫 1982 1983 | 漳 1983 1984 | 潆 1984 1985 | 潇 1985 1986 | 潍 1986 1987 | 潘 1987 1988 | 潜 1988 1989 | 潞 1989 1990 | 潢 1990 1991 | 潭 1991 1992 | 潮 1992 1993 | 潸 1993 1994 | 潼 1994 1995 | 澄 1995 1996 | 澈 1996 1997 | 澎 1997 1998 | 澜 1998 1999 | 澡 1999 2000 | 澳 2000 2001 | 激 2001 2002 | 濛 2002 2003 | 濡 2003 2004 | 濮 2004 2005 | 瀑 2005 2006 | 灌 2006 2007 | 火 2007 2008 | 灭 2008 2009 | 灯 2009 2010 | 灰 2010 2011 | 灵 2011 2012 | 灶 2012 2013 | 灼 2013 2014 | 灾 2014 2015 | 灿 2015 2016 | 炀 2016 2017 | 炉 2017 2018 | 炊 2018 2019 | 炎 2019 2020 | 炒 2020 2021 | 炕 2021 2022 | 炖 2022 2023 | 炝 2023 2024 | 炫 2024 2025 | 炬 2025 2026 | 炭 2026 2027 | 炮 2027 2028 | 炯 2028 2029 | 炳 2029 2030 | 炸 2030 2031 | 点 2031 2032 | 炼 2032 2033 | 烁 2033 2034 | 烂 2034 2035 | 烈 2035 2036 | 烘 2036 2037 | 烙 2037 2038 | 烛 2038 2039 | 烟 2039 2040 | 烤 2040 2041 | 烦 2041 2042 | 烧 2042 2043 | 烨 2043 2044 | 烩 2044 2045 | 烫 2045 2046 | 热 2046 2047 | 烹 2047 2048 | 焉 2048 2049 | 焊 2049 2050 | 焕 2050 2051 | 焖 2051 2052 | 焚 2052 2053 | 焦 2053 2054 | 焰 2054 2055 | 然 2055 2056 | 煅 2056 2057 | 煌 2057 2058 | 煎 2058 2059 | 煜 2059 2060 | 煞 2060 2061 | 煤 2061 2062 | 照 2062 2063 | 煨 2063 2064 | 煮 2064 2065 | 煲 2065 2066 | 煸 2066 2067 | 煽 2067 2068 | 熄 2068 2069 | 熊 2069 2070 | 熏 2070 2071 | 熔 2071 2072 | 熘 2072 2073 | 熙 2073 2074 | 熟 2074 2075 | 熬 2075 2076 | 燃 2076 2077 | 燎 2077 2078 | 燕 2078 2079 | 燥 2079 2080 | 爆 2080 2081 | 爪 2081 2082 | 爬 2082 2083 | 爱 2083 2084 | 爵 2084 2085 | 父 2085 2086 | 爷 2086 2087 | 爸 2087 2088 | 爹 2088 2089 | 爽 2089 2090 | 片 2090 2091 | 版 2091 2092 | 牌 2092 2093 | 牙 2093 2094 | 牛 2094 2095 | 牡 2095 2096 | 牢 2096 2097 | 牧 2097 2098 | 物 2098 2099 | 牲 2099 2100 | 牵 2100 2101 | 特 2101 2102 | 牺 2102 2103 | 犀 2103 2104 | 犄 2104 2105 | 犊 2105 2106 | 犟 2106 2107 | 犯 2107 2108 | 状 2108 2109 | 犷 2109 2110 | 犹 2110 2111 | 狂 2111 2112 | 狄 2112 2113 | 狈 2113 2114 | 狐 2114 2115 | 狗 2115 2116 | 狙 2116 2117 | 狞 2117 2118 | 狠 2118 2119 | 狡 2119 2120 | 独 2120 2121 | 狭 2121 2122 | 狮 2122 2123 | 狱 2123 2124 | 狸 2124 2125 | 狼 2125 2126 | 猎 2126 2127 | 猕 2127 2128 | 猖 2128 2129 | 猛 2129 2130 | 猜 2130 2131 | 猪 2131 2132 | 猫 2132 2133 | 猬 2133 2134 | 献 2134 2135 | 猴 2135 2136 | 獗 2136 2137 | 玄 2137 2138 | 率 2138 2139 | 玉 2139 2140 | 王 2140 2141 | 玛 2141 2142 | 玟 2142 2143 | 玩 2143 2144 | 玫 2144 2145 | 玮 2145 2146 | 环 2146 2147 | 现 2147 2148 | 玲 2148 2149 | 玻 2149 2150 | 珀 2150 2151 | 珈 2151 2152 | 珊 2152 2153 | 珍 2153 2154 | 珑 2154 2155 | 珠 2155 2156 | 班 2156 2157 | 球 2157 2158 | 理 2158 2159 | 琐 2159 2160 | 琢 2160 2161 | 琥 2161 2162 | 琦 2162 2163 | 琪 2163 2164 | 琳 2164 2165 | 琴 2165 2166 | 琵 2166 2167 | 琶 2167 2168 | 琼 2168 2169 | 瑕 2169 2170 | 瑜 2170 2171 | 瑞 2171 2172 | 瑟 2172 2173 | 瑰 2173 2174 | 瑶 2174 2175 | 璃 2175 2176 | 璇 2176 2177 | 璋 2177 2178 | 璐 2178 2179 | 璜 2179 2180 | 璧 2180 2181 | 瓜 2181 2182 | 瓣 2182 2183 | 瓦 2183 2184 | 瓶 2184 2185 | 瓷 2185 2186 | 甄 2186 2187 | 甑 2187 2188 | 甘 2188 2189 | 甚 2189 2190 | 甜 2190 2191 | 生 2191 2192 | 甥 2192 2193 | 用 2193 2194 | 甩 2194 2195 | 甫 2195 2196 | 甭 2196 2197 | 田 2197 2198 | 由 2198 2199 | 甲 2199 2200 | 申 2200 2201 | 电 2201 2202 | 男 2202 2203 | 画 2203 2204 | 畅 2204 2205 | 界 2205 2206 | 畏 2206 2207 | 畔 2207 2208 | 留 2208 2209 | 略 2209 2210 | 番 2210 2211 | 畴 2211 2212 | 畸 2212 2213 | 疆 2213 2214 | 疏 2214 2215 | 疑 2215 2216 | 疗 2216 2217 | 疙 2217 2218 | 疚 2218 2219 | 疡 2219 2220 | 疤 2220 2221 | 疫 2221 2222 | 疯 2222 2223 | 疲 2223 2224 | 疵 2224 2225 | 疼 2225 2226 | 疾 2226 2227 | 病 2227 2228 | 症 2228 2229 | 痒 2229 2230 | 痕 2230 2231 | 痘 2231 2232 | 痛 2232 2233 | 痞 2233 2234 | 痣 2234 2235 | 痰 2235 2236 | 痴 2236 2237 | 痿 2237 2238 | 瘠 2238 2239 | 瘤 2239 2240 | 瘦 2240 2241 | 瘩 2241 2242 | 瘴 2242 2243 | 瘾 2243 2244 | 癌 2244 2245 | 癖 2245 2246 | 癫 2246 2247 | 登 2247 2248 | 白 2248 2249 | 百 2249 2250 | 皂 2250 2251 | 的 2251 2252 | 皆 2252 2253 | 皇 2253 2254 | 皑 2254 2255 | 皓 2255 2256 | 皖 2256 2257 | 皮 2257 2258 | 皱 2258 2259 | 盆 2259 2260 | 盈 2260 2261 | 益 2261 2262 | 盐 2262 2263 | 监 2263 2264 | 盒 2264 2265 | 盔 2265 2266 | 盖 2266 2267 | 盗 2267 2268 | 盘 2268 2269 | 盛 2269 2270 | 盟 2270 2271 | 目 2271 2272 | 盯 2272 2273 | 盲 2273 2274 | 直 2274 2275 | 相 2275 2276 | 盼 2276 2277 | 盾 2277 2278 | 省 2278 2279 | 眉 2279 2280 | 看 2280 2281 | 真 2281 2282 | 眠 2282 2283 | 眨 2283 2284 | 眩 2284 2285 | 眯 2285 2286 | 眶 2286 2287 | 眷 2287 2288 | 眺 2288 2289 | 眼 2289 2290 | 着 2290 2291 | 睁 2291 2292 | 睐 2292 2293 | 睛 2293 2294 | 睡 2294 2295 | 督 2295 2296 | 睦 2296 2297 | 睫 2297 2298 | 睹 2298 2299 | 瞄 2299 2300 | 瞅 2300 2301 | 瞌 2301 2302 | 瞎 2302 2303 | 瞒 2303 2304 | 瞟 2304 2305 | 瞧 2305 2306 | 瞩 2306 2307 | 瞪 2307 2308 | 瞬 2308 2309 | 瞻 2309 2310 | 瞿 2310 2311 | 矛 2311 2312 | 矜 2312 2313 | 矢 2313 2314 | 矣 2314 2315 | 知 2315 2316 | 矩 2316 2317 | 矫 2317 2318 | 短 2318 2319 | 矮 2319 2320 | 石 2320 2321 | 矶 2321 2322 | 矿 2322 2323 | 码 2323 2324 | 砂 2324 2325 | 砌 2325 2326 | 砍 2326 2327 | 砒 2327 2328 | 研 2328 2329 | 砖 2329 2330 | 砣 2330 2331 | 破 2331 2332 | 砸 2332 2333 | 础 2333 2334 | 硅 2334 2335 | 硕 2335 2336 | 硝 2336 2337 | 硫 2337 2338 | 硬 2338 2339 | 确 2339 2340 | 碌 2340 2341 | 碍 2341 2342 | 碎 2342 2343 | 碑 2343 2344 | 碗 2344 2345 | 碟 2345 2346 | 碧 2346 2347 | 碰 2347 2348 | 碳 2348 2349 | 碴 2349 2350 | 磁 2350 2351 | 磅 2351 2352 | 磊 2352 2353 | 磋 2353 2354 | 磕 2354 2355 | 磨 2355 2356 | 磷 2356 2357 | 礁 2357 2358 | 礴 2358 2359 | 示 2359 2360 | 礼 2360 2361 | 社 2361 2362 | 祁 2362 2363 | 祈 2363 2364 | 祖 2364 2365 | 祜 2365 2366 | 祝 2366 2367 | 神 2367 2368 | 祠 2368 2369 | 祥 2369 2370 | 票 2370 2371 | 祭 2371 2372 | 祷 2372 2373 | 祸 2373 2374 | 禁 2374 2375 | 禅 2375 2376 | 福 2376 2377 | 禧 2377 2378 | 禹 2378 2379 | 离 2379 2380 | 禾 2380 2381 | 秀 2381 2382 | 私 2382 2383 | 秃 2383 2384 | 秋 2384 2385 | 种 2385 2386 | 科 2386 2387 | 秒 2387 2388 | 秘 2388 2389 | 租 2389 2390 | 秤 2390 2391 | 秦 2391 2392 | 秧 2392 2393 | 秩 2393 2394 | 积 2394 2395 | 称 2395 2396 | 移 2396 2397 | 秽 2397 2398 | 稀 2398 2399 | 程 2399 2400 | 稍 2400 2401 | 税 2401 2402 | 稚 2402 2403 | 稠 2403 2404 | 稣 2404 2405 | 稳 2405 2406 | 稷 2406 2407 | 稻 2407 2408 | 稼 2408 2409 | 稽 2409 2410 | 稿 2410 2411 | 穆 2411 2412 | 穗 2412 2413 | 究 2413 2414 | 穷 2414 2415 | 空 2415 2416 | 穿 2416 2417 | 突 2417 2418 | 窃 2418 2419 | 窄 2419 2420 | 窈 2420 2421 | 窍 2421 2422 | 窑 2422 2423 | 窕 2423 2424 | 窗 2424 2425 | 窘 2425 2426 | 窜 2426 2427 | 窝 2427 2428 | 窟 2428 2429 | 窦 2429 2430 | 窿 2430 2431 | 立 2431 2432 | 竖 2432 2433 | 站 2433 2434 | 竞 2434 2435 | 竟 2435 2436 | 章 2436 2437 | 童 2437 2438 | 竭 2438 2439 | 端 2439 2440 | 竹 2440 2441 | 笈 2441 2442 | 笋 2442 2443 | 笑 2443 2444 | 笔 2444 2445 | 笛 2445 2446 | 符 2446 2447 | 笨 2447 2448 | 第 2448 2449 | 笼 2449 2450 | 等 2450 2451 | 筋 2451 2452 | 筐 2452 2453 | 筑 2453 2454 | 筒 2454 2455 | 答 2455 2456 | 策 2456 2457 | 筛 2457 2458 | 筝 2458 2459 | 筷 2459 2460 | 筹 2460 2461 | 签 2461 2462 | 简 2462 2463 | 箍 2463 2464 | 算 2464 2465 | 管 2465 2466 | 箫 2466 2467 | 箭 2467 2468 | 箱 2468 2469 | 篇 2469 2470 | 篝 2470 2471 | 篡 2471 2472 | 篮 2472 2473 | 篷 2473 2474 | 簧 2474 2475 | 簿 2475 2476 | 籁 2476 2477 | 籍 2477 2478 | 米 2478 2479 | 类 2479 2480 | 籽 2480 2481 | 粉 2481 2482 | 粒 2482 2483 | 粕 2483 2484 | 粗 2484 2485 | 粘 2485 2486 | 粟 2486 2487 | 粤 2487 2488 | 粥 2488 2489 | 粪 2489 2490 | 粮 2490 2491 | 粱 2491 2492 | 粹 2492 2493 | 精 2493 2494 | 糅 2494 2495 | 糊 2495 2496 | 糕 2496 2497 | 糖 2497 2498 | 糙 2498 2499 | 糜 2499 2500 | 糟 2500 2501 | 糯 2501 2502 | 系 2502 2503 | 紊 2503 2504 | 素 2504 2505 | 索 2505 2506 | 紧 2506 2507 | 紫 2507 2508 | 累 2508 2509 | 絮 2509 2510 | 繁 2510 2511 | 纠 2511 2512 | 红 2512 2513 | 纣 2513 2514 | 纤 2514 2515 | 约 2515 2516 | 级 2516 2517 | 纨 2517 2518 | 纪 2518 2519 | 纬 2519 2520 | 纭 2520 2521 | 纯 2521 2522 | 纱 2522 2523 | 纲 2523 2524 | 纳 2524 2525 | 纵 2525 2526 | 纷 2526 2527 | 纸 2527 2528 | 纹 2528 2529 | 纺 2529 2530 | 纽 2530 2531 | 线 2531 2532 | 练 2532 2533 | 组 2533 2534 | 绅 2534 2535 | 细 2535 2536 | 织 2536 2537 | 终 2537 2538 | 绊 2538 2539 | 绍 2539 2540 | 绎 2540 2541 | 经 2541 2542 | 绑 2542 2543 | 绒 2543 2544 | 结 2544 2545 | 绔 2545 2546 | 绕 2546 2547 | 绘 2547 2548 | 给 2548 2549 | 络 2549 2550 | 绝 2550 2551 | 绞 2551 2552 | 统 2552 2553 | 绢 2553 2554 | 绣 2554 2555 | 继 2555 2556 | 绩 2556 2557 | 绪 2557 2558 | 绫 2558 2559 | 续 2559 2560 | 绮 2560 2561 | 绯 2561 2562 | 绰 2562 2563 | 绳 2563 2564 | 维 2564 2565 | 绵 2565 2566 | 绷 2566 2567 | 绸 2567 2568 | 综 2568 2569 | 绽 2569 2570 | 绿 2570 2571 | 缀 2571 2572 | 缆 2572 2573 | 缇 2573 2574 | 缓 2574 2575 | 编 2575 2576 | 缘 2576 2577 | 缚 2577 2578 | 缝 2578 2579 | 缠 2579 2580 | 缤 2580 2581 | 缩 2581 2582 | 缴 2582 2583 | 缸 2583 2584 | 缺 2584 2585 | 罂 2585 2586 | 罐 2586 2587 | 网 2587 2588 | 罕 2588 2589 | 罗 2589 2590 | 罚 2590 2591 | 罢 2591 2592 | 罩 2592 2593 | 罪 2593 2594 | 置 2594 2595 | 署 2595 2596 | 羁 2596 2597 | 羊 2597 2598 | 美 2598 2599 | 羔 2599 2600 | 羚 2600 2601 | 羞 2601 2602 | 羡 2602 2603 | 群 2603 2604 | 羸 2604 2605 | 羹 2605 2606 | 羽 2606 2607 | 翁 2607 2608 | 翅 2608 2609 | 翔 2609 2610 | 翘 2610 2611 | 翠 2611 2612 | 翡 2612 2613 | 翩 2613 2614 | 翰 2614 2615 | 翱 2615 2616 | 翻 2616 2617 | 翼 2617 2618 | 耀 2618 2619 | 老 2619 2620 | 考 2620 2621 | 者 2621 2622 | 而 2622 2623 | 耍 2623 2624 | 耐 2624 2625 | 耕 2625 2626 | 耗 2626 2627 | 耳 2627 2628 | 耶 2628 2629 | 耸 2629 2630 | 耻 2630 2631 | 耽 2631 2632 | 聂 2632 2633 | 聊 2633 2634 | 聋 2634 2635 | 职 2635 2636 | 联 2636 2637 | 聘 2637 2638 | 聚 2638 2639 | 聪 2639 2640 | 肃 2640 2641 | 肆 2641 2642 | 肇 2642 2643 | 肉 2643 2644 | 肋 2644 2645 | 肌 2645 2646 | 肖 2646 2647 | 肘 2647 2648 | 肚 2648 2649 | 肝 2649 2650 | 肠 2650 2651 | 股 2651 2652 | 肢 2652 2653 | 肤 2653 2654 | 肥 2654 2655 | 肩 2655 2656 | 肪 2656 2657 | 肮 2657 2658 | 肯 2658 2659 | 育 2659 2660 | 肴 2660 2661 | 肺 2661 2662 | 肾 2662 2663 | 肿 2663 2664 | 胀 2664 2665 | 胁 2665 2666 | 胃 2666 2667 | 胆 2667 2668 | 背 2668 2669 | 胎 2669 2670 | 胖 2670 2671 | 胜 2671 2672 | 胞 2672 2673 | 胡 2673 2674 | 胤 2674 2675 | 胧 2675 2676 | 胫 2676 2677 | 胭 2677 2678 | 胳 2678 2679 | 胶 2679 2680 | 胸 2680 2681 | 能 2681 2682 | 脂 2682 2683 | 脆 2683 2684 | 脉 2684 2685 | 脊 2685 2686 | 脏 2686 2687 | 脐 2687 2688 | 脑 2688 2689 | 脖 2689 2690 | 脚 2690 2691 | 脯 2691 2692 | 脱 2692 2693 | 脸 2693 2694 | 脾 2694 2695 | 腆 2695 2696 | 腊 2696 2697 | 腋 2697 2698 | 腌 2698 2699 | 腐 2699 2700 | 腑 2700 2701 | 腓 2701 2702 | 腔 2702 2703 | 腕 2703 2704 | 腥 2704 2705 | 腩 2705 2706 | 腰 2706 2707 | 腹 2707 2708 | 腺 2708 2709 | 腻 2709 2710 | 腼 2710 2711 | 腾 2711 2712 | 腿 2712 2713 | 膀 2713 2714 | 膊 2714 2715 | 膏 2715 2716 | 膜 2716 2717 | 膝 2717 2718 | 膨 2718 2719 | 膳 2719 2720 | 膻 2720 2721 | 臀 2721 2722 | 臂 2722 2723 | 臃 2723 2724 | 臣 2724 2725 | 臧 2725 2726 | 自 2726 2727 | 臭 2727 2728 | 至 2728 2729 | 致 2729 2730 | 舅 2730 2731 | 舆 2731 2732 | 舌 2732 2733 | 舍 2733 2734 | 舒 2734 2735 | 舔 2735 2736 | 舜 2736 2737 | 舞 2737 2738 | 舟 2738 2739 | 航 2739 2740 | 般 2740 2741 | 舰 2741 2742 | 舵 2742 2743 | 船 2743 2744 | 艇 2744 2745 | 艘 2745 2746 | 良 2746 2747 | 艰 2747 2748 | 色 2748 2749 | 艳 2749 2750 | 艺 2750 2751 | 艾 2751 2752 | 节 2752 2753 | 芋 2753 2754 | 芒 2754 2755 | 芙 2755 2756 | 芜 2756 2757 | 芝 2757 2758 | 芦 2758 2759 | 芬 2759 2760 | 芭 2760 2761 | 芮 2761 2762 | 花 2762 2763 | 芳 2763 2764 | 芸 2764 2765 | 芹 2765 2766 | 芽 2766 2767 | 苇 2767 2768 | 苍 2768 2769 | 苏 2769 2770 | 苔 2770 2771 | 苗 2771 2772 | 苛 2772 2773 | 苞 2773 2774 | 苟 2774 2775 | 若 2775 2776 | 苦 2776 2777 | 英 2777 2778 | 苹 2778 2779 | 茂 2779 2780 | 范 2780 2781 | 茄 2781 2782 | 茅 2782 2783 | 茉 2783 2784 | 茜 2784 2785 | 茧 2785 2786 | 茨 2786 2787 | 茫 2787 2788 | 茱 2788 2789 | 茵 2789 2790 | 茶 2790 2791 | 茸 2791 2792 | 茹 2792 2793 | 荆 2793 2794 | 草 2794 2795 | 荐 2795 2796 | 荒 2796 2797 | 荔 2797 2798 | 荞 2798 2799 | 荠 2799 2800 | 荡 2800 2801 | 荣 2801 2802 | 荤 2802 2803 | 荦 2803 2804 | 荫 2804 2805 | 荮 2805 2806 | 药 2806 2807 | 荷 2807 2808 | 荼 2808 2809 | 莉 2809 2810 | 莎 2810 2811 | 莓 2811 2812 | 莞 2812 2813 | 莫 2813 2814 | 莱 2814 2815 | 莲 2815 2816 | 获 2816 2817 | 莹 2817 2818 | 莺 2818 2819 | 菇 2819 2820 | 菊 2820 2821 | 菌 2821 2822 | 菜 2822 2823 | 菠 2823 2824 | 菩 2824 2825 | 菱 2825 2826 | 菲 2826 2827 | 萃 2827 2828 | 萄 2828 2829 | 萌 2829 2830 | 萍 2830 2831 | 萎 2831 2832 | 萝 2832 2833 | 营 2833 2834 | 萧 2834 2835 | 萨 2835 2836 | 萱 2836 2837 | 落 2837 2838 | 著 2838 2839 | 葛 2839 2840 | 葡 2840 2841 | 董 2841 2842 | 葫 2842 2843 | 葬 2843 2844 | 葱 2844 2845 | 葳 2845 2846 | 葵 2846 2847 | 蒂 2847 2848 | 蒋 2848 2849 | 蒙 2849 2850 | 蒜 2850 2851 | 蒲 2851 2852 | 蒸 2852 2853 | 蓄 2853 2854 | 蓉 2854 2855 | 蓓 2855 2856 | 蓝 2856 2857 | 蓬 2857 2858 | 蔓 2858 2859 | 蔗 2859 2860 | 蔚 2860 2861 | 蔡 2861 2862 | 蔬 2862 2863 | 蔷 2863 2864 | 蔼 2864 2865 | 蔽 2865 2866 | 蕃 2866 2867 | 蕉 2867 2868 | 蕊 2868 2869 | 蕙 2869 2870 | 蕴 2870 2871 | 蕾 2871 2872 | 薄 2872 2873 | 薇 2873 2874 | 薛 2874 2875 | 薪 2875 2876 | 薯 2876 2877 | 藉 2877 2878 | 藏 2878 2879 | 藕 2879 2880 | 藤 2880 2881 | 藩 2881 2882 | 蘑 2882 2883 | 蘸 2883 2884 | 虎 2884 2885 | 虏 2885 2886 | 虐 2886 2887 | 虑 2887 2888 | 虔 2888 2889 | 虚 2889 2890 | 虞 2890 2891 | 虫 2891 2892 | 虱 2892 2893 | 虹 2893 2894 | 虻 2894 2895 | 虽 2895 2896 | 虾 2896 2897 | 蚀 2897 2898 | 蚁 2898 2899 | 蚂 2899 2900 | 蚊 2900 2901 | 蚌 2901 2902 | 蚓 2902 2903 | 蚕 2903 2904 | 蚯 2904 2905 | 蛀 2905 2906 | 蛆 2906 2907 | 蛇 2907 2908 | 蛋 2908 2909 | 蛙 2909 2910 | 蛛 2910 2911 | 蛮 2911 2912 | 蜀 2912 2913 | 蜂 2913 2914 | 蜇 2914 2915 | 蜒 2915 2916 | 蜓 2916 2917 | 蜗 2917 2918 | 蜘 2918 2919 | 蜚 2919 2920 | 蜜 2920 2921 | 蜡 2921 2922 | 蜻 2922 2923 | 蝇 2923 2924 | 蝉 2924 2925 | 蝎 2925 2926 | 蝗 2926 2927 | 蝙 2927 2928 | 蝠 2928 2929 | 蝴 2929 2930 | 蝶 2930 2931 | 螃 2931 2932 | 融 2932 2933 | 螺 2933 2934 | 蟹 2934 2935 | 蠢 2935 2936 | 血 2936 2937 | 衅 2937 2938 | 行 2938 2939 | 衍 2939 2940 | 衔 2940 2941 | 街 2941 2942 | 衙 2942 2943 | 衡 2943 2944 | 衣 2944 2945 | 补 2945 2946 | 表 2946 2947 | 衩 2947 2948 | 衫 2948 2949 | 衬 2949 2950 | 衮 2950 2951 | 衰 2951 2952 | 衷 2952 2953 | 袁 2953 2954 | 袂 2954 2955 | 袋 2955 2956 | 袍 2956 2957 | 袖 2957 2958 | 袜 2958 2959 | 袢 2959 2960 | 被 2960 2961 | 袭 2961 2962 | 袱 2962 2963 | 裁 2963 2964 | 裂 2964 2965 | 装 2965 2966 | 裕 2966 2967 | 裘 2967 2968 | 裙 2968 2969 | 裤 2969 2970 | 裨 2970 2971 | 裳 2971 2972 | 裴 2972 2973 | 裸 2973 2974 | 裹 2974 2975 | 褂 2975 2976 | 褒 2976 2977 | 褚 2977 2978 | 褶 2978 2979 | 襄 2979 2980 | 西 2980 2981 | 要 2981 2982 | 覆 2982 2983 | 见 2983 2984 | 观 2984 2985 | 规 2985 2986 | 觅 2986 2987 | 视 2987 2988 | 览 2988 2989 | 觉 2989 2990 | 角 2990 2991 | 解 2991 2992 | 触 2992 2993 | 言 2993 2994 | 詹 2994 2995 | 誉 2995 2996 | 誓 2996 2997 | 警 2997 2998 | 譬 2998 2999 | 计 2999 3000 | 订 3000 3001 | 认 3001 3002 | 讨 3002 3003 | 让 3003 3004 | 训 3004 3005 | 议 3005 3006 | 讯 3006 3007 | 记 3007 3008 | 讲 3008 3009 | 讳 3009 3010 | 讶 3010 3011 | 讷 3011 3012 | 许 3012 3013 | 讹 3013 3014 | 论 3014 3015 | 讽 3015 3016 | 设 3016 3017 | 访 3017 3018 | 诀 3018 3019 | 证 3019 3020 | 评 3020 3021 | 识 3021 3022 | 诈 3022 3023 | 诉 3023 3024 | 词 3024 3025 | 诏 3025 3026 | 译 3026 3027 | 试 3027 3028 | 诗 3028 3029 | 诙 3029 3030 | 诚 3030 3031 | 诛 3031 3032 | 话 3032 3033 | 诞 3033 3034 | 询 3034 3035 | 诣 3035 3036 | 该 3036 3037 | 详 3037 3038 | 诧 3038 3039 | 诬 3039 3040 | 语 3040 3041 | 误 3041 3042 | 诱 3042 3043 | 说 3043 3044 | 诵 3044 3045 | 诶 3045 3046 | 请 3046 3047 | 诸 3047 3048 | 诺 3048 3049 | 读 3049 3050 | 诽 3050 3051 | 课 3051 3052 | 谁 3052 3053 | 调 3053 3054 | 谅 3054 3055 | 谆 3055 3056 | 谈 3056 3057 | 谊 3057 3058 | 谋 3058 3059 | 谍 3059 3060 | 谎 3060 3061 | 谏 3061 3062 | 谐 3062 3063 | 谓 3063 3064 | 谕 3064 3065 | 谚 3065 3066 | 谛 3066 3067 | 谜 3067 3068 | 谢 3068 3069 | 谣 3069 3070 | 谤 3070 3071 | 谦 3071 3072 | 谨 3072 3073 | 谬 3073 3074 | 谭 3074 3075 | 谱 3075 3076 | 谴 3076 3077 | 谷 3077 3078 | 豁 3078 3079 | 豆 3079 3080 | 豇 3080 3081 | 豚 3081 3082 | 象 3082 3083 | 豪 3083 3084 | 豫 3084 3085 | 豹 3085 3086 | 豺 3086 3087 | 貂 3087 3088 | 貌 3088 3089 | 贝 3089 3090 | 贞 3090 3091 | 负 3091 3092 | 贡 3092 3093 | 财 3093 3094 | 责 3094 3095 | 贤 3095 3096 | 败 3096 3097 | 货 3097 3098 | 质 3098 3099 | 贩 3099 3100 | 贪 3100 3101 | 贫 3101 3102 | 贬 3102 3103 | 购 3103 3104 | 贯 3104 3105 | 贱 3105 3106 | 贴 3106 3107 | 贵 3107 3108 | 贷 3108 3109 | 贸 3109 3110 | 费 3110 3111 | 贺 3111 3112 | 贼 3112 3113 | 贾 3113 3114 | 贿 3114 3115 | 赂 3115 3116 | 资 3116 3117 | 赋 3117 3118 | 赌 3118 3119 | 赎 3119 3120 | 赏 3120 3121 | 赐 3121 3122 | 赔 3122 3123 | 赖 3123 3124 | 赘 3124 3125 | 赚 3125 3126 | 赛 3126 3127 | 赞 3127 3128 | 赠 3128 3129 | 赡 3129 3130 | 赢 3130 3131 | 赤 3131 3132 | 赦 3132 3133 | 赫 3133 3134 | 走 3134 3135 | 赴 3135 3136 | 赵 3136 3137 | 赶 3137 3138 | 起 3138 3139 | 趁 3139 3140 | 超 3140 3141 | 越 3141 3142 | 趋 3142 3143 | 趟 3143 3144 | 趣 3144 3145 | 足 3145 3146 | 趴 3146 3147 | 趾 3147 3148 | 跃 3148 3149 | 跆 3149 3150 | 跋 3150 3151 | 跌 3151 3152 | 跎 3152 3153 | 跑 3153 3154 | 距 3154 3155 | 跟 3155 3156 | 跤 3156 3157 | 跨 3157 3158 | 跪 3158 3159 | 路 3159 3160 | 跳 3160 3161 | 践 3161 3162 | 跷 3162 3163 | 跺 3163 3164 | 踏 3164 3165 | 踢 3165 3166 | 踩 3166 3167 | 踪 3167 3168 | 踮 3168 3169 | 踹 3169 3170 | 蹄 3170 3171 | 蹈 3171 3172 | 蹉 3172 3173 | 蹋 3173 3174 | 蹦 3174 3175 | 蹬 3175 3176 | 蹭 3176 3177 | 蹲 3177 3178 | 蹿 3178 3179 | 躁 3179 3180 | 身 3180 3181 | 躯 3181 3182 | 躲 3182 3183 | 躺 3183 3184 | 车 3184 3185 | 轧 3185 3186 | 轨 3186 3187 | 轩 3187 3188 | 转 3188 3189 | 轭 3189 3190 | 轮 3190 3191 | 软 3191 3192 | 轰 3192 3193 | 轱 3193 3194 | 轻 3194 3195 | 载 3195 3196 | 轿 3196 3197 | 较 3197 3198 | 辄 3198 3199 | 辅 3199 3200 | 辆 3200 3201 | 辈 3201 3202 | 辉 3202 3203 | 辐 3203 3204 | 辑 3204 3205 | 输 3205 3206 | 辘 3206 3207 | 辙 3207 3208 | 辛 3208 3209 | 辜 3209 3210 | 辞 3210 3211 | 辟 3211 3212 | 辣 3212 3213 | 辨 3213 3214 | 辩 3214 3215 | 辫 3215 3216 | 辰 3216 3217 | 辱 3217 3218 | 边 3218 3219 | 辽 3219 3220 | 达 3220 3221 | 迁 3221 3222 | 迄 3222 3223 | 迅 3223 3224 | 过 3224 3225 | 迈 3225 3226 | 迎 3226 3227 | 运 3227 3228 | 近 3228 3229 | 返 3229 3230 | 还 3230 3231 | 这 3231 3232 | 进 3232 3233 | 远 3233 3234 | 违 3234 3235 | 连 3235 3236 | 迟 3236 3237 | 迦 3237 3238 | 迪 3238 3239 | 迫 3239 3240 | 述 3240 3241 | 迷 3241 3242 | 迹 3242 3243 | 追 3243 3244 | 退 3244 3245 | 送 3245 3246 | 适 3246 3247 | 逃 3247 3248 | 逆 3248 3249 | 选 3249 3250 | 逊 3250 3251 | 逍 3251 3252 | 透 3252 3253 | 逐 3253 3254 | 递 3254 3255 | 途 3255 3256 | 逗 3256 3257 | 通 3257 3258 | 逛 3258 3259 | 逝 3259 3260 | 逞 3260 3261 | 速 3261 3262 | 造 3262 3263 | 逢 3263 3264 | 逮 3264 3265 | 逸 3265 3266 | 逻 3266 3267 | 逼 3267 3268 | 遇 3268 3269 | 遍 3269 3270 | 遏 3270 3271 | 道 3271 3272 | 遗 3272 3273 | 遛 3273 3274 | 遢 3274 3275 | 遣 3275 3276 | 遥 3276 3277 | 遭 3277 3278 | 遮 3278 3279 | 遵 3279 3280 | 避 3280 3281 | 邀 3281 3282 | 邋 3282 3283 | 邓 3283 3284 | 邢 3284 3285 | 那 3285 3286 | 邦 3286 3287 | 邪 3287 3288 | 邮 3288 3289 | 邯 3289 3290 | 邰 3290 3291 | 邱 3291 3292 | 邳 3292 3293 | 邵 3293 3294 | 邻 3294 3295 | 郁 3295 3296 | 郅 3296 3297 | 郊 3297 3298 | 郎 3298 3299 | 郑 3299 3300 | 郝 3300 3301 | 郡 3301 3302 | 郦 3302 3303 | 部 3303 3304 | 郭 3304 3305 | 都 3305 3306 | 鄙 3306 3307 | 鄱 3307 3308 | 酋 3308 3309 | 配 3309 3310 | 酒 3310 3311 | 酗 3311 3312 | 酝 3312 3313 | 酣 3313 3314 | 酥 3314 3315 | 酬 3315 3316 | 酱 3316 3317 | 酷 3317 3318 | 酸 3318 3319 | 酿 3319 3320 | 醇 3320 3321 | 醉 3321 3322 | 醋 3322 3323 | 醒 3323 3324 | 采 3324 3325 | 释 3325 3326 | 里 3326 3327 | 重 3327 3328 | 野 3328 3329 | 量 3329 3330 | 金 3330 3331 | 釜 3331 3332 | 鉴 3332 3333 | 鑫 3333 3334 | 针 3334 3335 | 钉 3335 3336 | 钓 3336 3337 | 钗 3337 3338 | 钙 3338 3339 | 钛 3339 3340 | 钝 3340 3341 | 钞 3341 3342 | 钟 3342 3343 | 钠 3343 3344 | 钡 3344 3345 | 钢 3345 3346 | 钥 3346 3347 | 钦 3347 3348 | 钩 3348 3349 | 钱 3349 3350 | 钻 3350 3351 | 铁 3351 3352 | 铃 3352 3353 | 铅 3353 3354 | 铎 3354 3355 | 铛 3355 3356 | 铜 3356 3357 | 铭 3357 3358 | 铮 3358 3359 | 铲 3359 3360 | 银 3360 3361 | 铸 3361 3362 | 铺 3362 3363 | 链 3363 3364 | 铿 3364 3365 | 销 3365 3366 | 锁 3366 3367 | 锄 3367 3368 | 锅 3368 3369 | 锈 3369 3370 | 锉 3370 3371 | 锋 3371 3372 | 锌 3372 3373 | 锐 3373 3374 | 锔 3374 3375 | 错 3375 3376 | 锚 3376 3377 | 锡 3377 3378 | 锣 3378 3379 | 锤 3379 3380 | 锦 3380 3381 | 键 3381 3382 | 锵 3382 3383 | 锹 3383 3384 | 锻 3384 3385 | 镀 3385 3386 | 镇 3386 3387 | 镐 3387 3388 | 镑 3388 3389 | 镕 3389 3390 | 镖 3390 3391 | 镜 3391 3392 | 镭 3392 3393 | 镯 3393 3394 | 镶 3394 3395 | 长 3395 3396 | 门 3396 3397 | 闪 3397 3398 | 闭 3398 3399 | 问 3399 3400 | 闯 3400 3401 | 闰 3401 3402 | 闲 3402 3403 | 间 3403 3404 | 闷 3404 3405 | 闸 3405 3406 | 闹 3406 3407 | 闺 3407 3408 | 闻 3408 3409 | 阀 3409 3410 | 阁 3410 3411 | 阂 3411 3412 | 阅 3412 3413 | 阎 3413 3414 | 阐 3414 3415 | 阔 3415 3416 | 阜 3416 3417 | 队 3417 3418 | 阱 3418 3419 | 防 3419 3420 | 阳 3420 3421 | 阴 3421 3422 | 阵 3422 3423 | 阶 3423 3424 | 阻 3424 3425 | 阿 3425 3426 | 陀 3426 3427 | 附 3427 3428 | 际 3428 3429 | 陆 3429 3430 | 陈 3430 3431 | 陋 3431 3432 | 陌 3432 3433 | 降 3433 3434 | 限 3434 3435 | 陕 3435 3436 | 陡 3436 3437 | 院 3437 3438 | 除 3438 3439 | 陨 3439 3440 | 险 3440 3441 | 陪 3441 3442 | 陵 3442 3443 | 陶 3443 3444 | 陷 3444 3445 | 隆 3445 3446 | 隋 3446 3447 | 隍 3447 3448 | 随 3448 3449 | 隐 3449 3450 | 隔 3450 3451 | 隘 3451 3452 | 障 3452 3453 | 隧 3453 3454 | 隶 3454 3455 | 难 3455 3456 | 雀 3456 3457 | 雁 3457 3458 | 雄 3458 3459 | 雅 3459 3460 | 集 3460 3461 | 雇 3461 3462 | 雍 3462 3463 | 雕 3463 3464 | 雨 3464 3465 | 雪 3465 3466 | 雯 3466 3467 | 零 3467 3468 | 雷 3468 3469 | 雾 3469 3470 | 需 3470 3471 | 霄 3471 3472 | 霆 3472 3473 | 震 3473 3474 | 霉 3474 3475 | 霍 3475 3476 | 霎 3476 3477 | 霏 3477 3478 | 霓 3478 3479 | 霜 3479 3480 | 霞 3480 3481 | 露 3481 3482 | 霸 3482 3483 | 青 3483 3484 | 靓 3484 3485 | 靖 3485 3486 | 静 3486 3487 | 非 3487 3488 | 靠 3488 3489 | 靡 3489 3490 | 面 3490 3491 | 革 3491 3492 | 靴 3492 3493 | 靶 3493 3494 | 鞋 3494 3495 | 鞍 3495 3496 | 鞭 3496 3497 | 韦 3497 3498 | 韧 3498 3499 | 韩 3499 3500 | 韭 3500 3501 | 音 3501 3502 | 韵 3502 3503 | 韶 3503 3504 | 页 3504 3505 | 顶 3505 3506 | 项 3506 3507 | 顺 3507 3508 | 须 3508 3509 | 顽 3509 3510 | 顾 3510 3511 | 顿 3511 3512 | 颁 3512 3513 | 颂 3513 3514 | 预 3514 3515 | 领 3515 3516 | 颇 3516 3517 | 颈 3517 3518 | 颊 3518 3519 | 颐 3519 3520 | 频 3520 3521 | 颓 3521 3522 | 颖 3522 3523 | 颗 3523 3524 | 题 3524 3525 | 颜 3525 3526 | 额 3526 3527 | 颠 3527 3528 | 颤 3528 3529 | 颦 3529 3530 | 风 3530 3531 | 飕 3531 3532 | 飘 3532 3533 | 飙 3533 3534 | 飞 3534 3535 | 食 3535 3536 | 餐 3536 3537 | 餮 3537 3538 | 饥 3538 3539 | 饪 3539 3540 | 饭 3540 3541 | 饮 3541 3542 | 饰 3542 3543 | 饱 3543 3544 | 饶 3544 3545 | 饺 3545 3546 | 饼 3546 3547 | 饿 3547 3548 | 馄 3548 3549 | 馅 3549 3550 | 馆 3550 3551 | 馈 3551 3552 | 馋 3552 3553 | 馍 3553 3554 | 馒 3554 3555 | 首 3555 3556 | 香 3556 3557 | 馨 3557 3558 | 马 3558 3559 | 驭 3559 3560 | 驮 3560 3561 | 驰 3561 3562 | 驱 3562 3563 | 驴 3563 3564 | 驶 3564 3565 | 驸 3565 3566 | 驹 3566 3567 | 驻 3567 3568 | 驼 3568 3569 | 驾 3569 3570 | 驿 3570 3571 | 骂 3571 3572 | 骄 3572 3573 | 骆 3573 3574 | 骋 3574 3575 | 验 3575 3576 | 骏 3576 3577 | 骑 3577 3578 | 骗 3578 3579 | 骚 3579 3580 | 骛 3580 3581 | 骞 3581 3582 | 骤 3582 3583 | 骨 3583 3584 | 骷 3584 3585 | 骸 3585 3586 | 骼 3586 3587 | 髅 3587 3588 | 髓 3588 3589 | 高 3589 3590 | 髦 3590 3591 | 鬼 3591 3592 | 魁 3592 3593 | 魂 3593 3594 | 魄 3594 3595 | 魅 3595 3596 | 魏 3596 3597 | 魔 3597 3598 | 鱼 3598 3599 | 鱿 3599 3600 | 鲁 3600 3601 | 鲍 3601 3602 | 鲜 3602 3603 | 鲤 3603 3604 | 鲨 3604 3605 | 鲫 3605 3606 | 鲸 3606 3607 | 鳅 3607 3608 | 鳌 3608 3609 | 鳖 3609 3610 | 鳝 3610 3611 | 鳞 3611 3612 | 鸟 3612 3613 | 鸡 3613 3614 | 鸣 3614 3615 | 鸦 3615 3616 | 鸭 3616 3617 | 鸯 3617 3618 | 鸳 3618 3619 | 鸽 3619 3620 | 鸿 3620 3621 | 鹅 3621 3622 | 鹏 3622 3623 | 鹤 3623 3624 | 鹰 3624 3625 | 鹿 3625 3626 | 麋 3626 3627 | 麒 3627 3628 | 麟 3628 3629 | 麦 3629 3630 | 麻 3630 3631 | 麽 3631 3632 | 黄 3632 3633 | 黎 3633 3634 | 黏 3634 3635 | 黑 3635 3636 | 默 3636 3637 | 黛 3637 3638 | 黝 3638 3639 | 黟 3639 3640 | 黯 3640 3641 | 鼎 3641 3642 | 鼓 3642 3643 | 鼠 3643 3644 | 鼻 3644 3645 | 齐 3645 3646 | 齿 3646 3647 | 龄 3647 3648 | 龊 3648 3649 | 龌 3649 3650 | 龙 3650 3651 | 龚 3651 3652 | 龟 3652 3653 | + 3653 3654 | -------------------------------------------------------------------------------- /test/learn_pytorch.py: -------------------------------------------------------------------------------- 1 | # If I'm not sure waht some function or class actually doing, I will write 2 | # snippet codes to confirm my unstanding. 3 | 4 | import torch 5 | import torch.nn.functional as F 6 | 7 | 8 | def learn_cross_entropy(): 9 | IGNORE_ID = -1 10 | torch.manual_seed(123) 11 | 12 | input = torch.randn(4, 5, requires_grad=True) # N x C 13 | target = torch.randint(5, (4,), dtype=torch.int64) # N 14 | target[-1] = IGNORE_ID 15 | print("input:\n", input) 16 | print("target:\n", target) 17 | 18 | # PART 1: confirm F.cross_entropy() == F.log_softmax() + F.nll_loss() 19 | ce = F.cross_entropy( 20 | input, target, ignore_index=IGNORE_ID, reduction='elementwise_mean') 21 | print("### Using F.cross_entropy()") 22 | print("ce =", ce) 23 | ls = F.log_softmax(input, dim=1) 24 | nll = F.nll_loss(ls, target, ignore_index=IGNORE_ID, 25 | reduction='elementwise_mean') 26 | print("### Using F.log_softmax() + F.nll_loss()") 27 | print("nll =", nll) 28 | print("### [CONFIRM] F.cross_entropy() == F.log_softmax() + F.nll_loss()\n") 29 | 30 | # PART 2: confirm log_softmax = log + softmax 31 | print("log_softmax():\n", ls) 32 | softmax = F.softmax(input, dim=1) 33 | log_softmax = torch.log(softmax) 34 | print("softmax():\n", softmax) 35 | print("log() + softmax():\n", log_softmax) 36 | print("### [CONFIRM] log_softmax() == log() + softmax()\n") 37 | 38 | # PART 3: confirm ignore_index works 39 | non_ignore_index = target[target != IGNORE_ID] 40 | print(non_ignore_index) 41 | print(log_softmax[target != IGNORE_ID]) 42 | loss_each_sample = torch.stack([log_softmax[i][idx] 43 | for i, idx in enumerate(non_ignore_index)], dim=0) 44 | print(loss_each_sample) 45 | print(-1 * torch.mean(loss_each_sample)) 46 | print("### [CONFIRM] ignore_index in F.cross_entropy() works\n") 47 | 48 | # PART 4: confirm cross_entropy()'s backward() works correctly when set ignore_index 49 | # nll = 1/N * -1 * sum(log(softmax(input, dim=1))[target]) 50 | # d_nll / d_input = 1/N * (softmax(input, dim=1) - target) 51 | print("softmax:\n", softmax) 52 | print("non ignore softmax:") 53 | print(softmax[:len(non_ignore_index)]) 54 | print(softmax[range(len(non_ignore_index)), non_ignore_index]) 55 | print("target\n", target) 56 | grad = softmax 57 | grad[range(len(non_ignore_index)), non_ignore_index] -= 1 58 | grad /= len(non_ignore_index) 59 | grad[-1] = 0.0 # IGNORE_ID postition 60 | print("my gradient:\n", grad) 61 | ce.backward() 62 | print("pytorch gradient:\n", input.grad) 63 | print("### [CONFIRM] F.cross_entropy()'s backward() works correctly when " 64 | "set ignore_index") 65 | 66 | 67 | if __name__ == "__main__": 68 | learn_cross_entropy() 69 | -------------------------------------------------------------------------------- /test/learn_visdom.py: -------------------------------------------------------------------------------- 1 | # INSTALL 2 | # $ pip install visdom 3 | # START 4 | # $ visdom 5 | # or 6 | # $ python -m visdom.server 7 | # open browser and visit http://localhost:8097 8 | 9 | import torch 10 | import visdom 11 | 12 | vis = visdom.Visdom(env="model_1") 13 | vis.text('Hello, world', win='text1') 14 | vis.text('Hi, Kaituo', win='text1', append=True) 15 | for i in range(10): 16 | vis.line(X=torch.FloatTensor([i]), Y=torch.FloatTensor([i**2]), win='loss', update='append' if i> 0 else None) 17 | 18 | 19 | epochs = 20 20 | loss_result = torch.Tensor(epochs) 21 | for i in range(epochs): 22 | loss_result[i] = i ** 2 23 | opts = dict(title='LAS', ylabel='loss', xlabel='epoch') 24 | x_axis = torch.arange(1, epochs+1) 25 | y_axis = loss_result[:epochs] 26 | vis2 = visdom.Visdom(env="view_loss") 27 | vis2.line(X=x_axis, Y=y_axis, opts=opts) 28 | 29 | 30 | while True: 31 | continue 32 | -------------------------------------------------------------------------------- /test/path.sh: -------------------------------------------------------------------------------- 1 | export PYTHONPATH=../src/:$PYTHONPATH 2 | -------------------------------------------------------------------------------- /test/test_attention.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from models.attention import DotProductAttention 4 | 5 | if __name__ == "__main__": 6 | torch.manual_seed(123) 7 | Tos = [1, 5] 8 | for i in range(len(Tos)): 9 | print("\n### loop", i) 10 | N, To, Ti, H = 3, Tos[i], 4, 2 11 | queries = torch.randn(N, To, H) 12 | values = torch.randn(N, Ti, H) 13 | attention = DotProductAttention() 14 | attention_output, attention_distribution = attention(queries, values) 15 | print(attention_output.size()) 16 | print(attention_output) 17 | print(attention_distribution.size()) 18 | print(attention_distribution) 19 | -------------------------------------------------------------------------------- /test/test_data.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from data.data import AudioDataset 4 | from data.data import AudioDataLoader 5 | 6 | 7 | if __name__ == "__main__": 8 | train_json = "data/data.json" 9 | batch_size = 2 10 | max_length_in = 1000 11 | max_length_out = 1000 12 | num_batches = 10 13 | num_workers = 2 14 | 15 | train_dataset = AudioDataset( 16 | train_json, batch_size, max_length_in, max_length_out, num_batches) 17 | # NOTE: must set batch_size=1 here. 18 | train_loader = AudioDataLoader( 19 | train_dataset, batch_size=1, num_workers=num_workers) 20 | 21 | for i, (data) in enumerate(train_loader): 22 | inputs, inputs_lens, targets = data 23 | print(i) 24 | # print(inputs) 25 | print(inputs_lens) 26 | # print(targets) 27 | print("*"*20) 28 | -------------------------------------------------------------------------------- /test/test_decoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from models.decoder import Decoder 4 | from utils.utils import IGNORE_ID 5 | 6 | if __name__ == "__main__": 7 | torch.manual_seed(123) 8 | VOC, EMB, SOS, EOS, H, L = 10, 20, 8, 9, 2, 2 9 | N, To, Ti = 4, 5, 3 10 | decoder = Decoder(VOC, EMB, SOS, EOS, H, L) 11 | print(decoder) 12 | padded_input = torch.randint(10, (N, To), dtype=torch.long) # N x To 13 | padded_input[-1, -3:] = IGNORE_ID 14 | encoder_padded_outputs = torch.randn(N, Ti, H) # N x Ti x H 15 | print(padded_input) 16 | print(padded_input.size()) 17 | print(encoder_padded_outputs) 18 | print(encoder_padded_outputs.size()) 19 | loss = decoder( 20 | padded_input, encoder_padded_outputs) 21 | print(loss) 22 | -------------------------------------------------------------------------------- /test/test_encoder.py: -------------------------------------------------------------------------------- 1 | # Just for learning unittest 2 | # 1. run `. ./path.sh` first 3 | # 2. run `python -m unittest test_encoder.py` 4 | # or `python test_encoder.py` 5 | import unittest 6 | 7 | import torch 8 | 9 | from models.encoder import Encoder 10 | 11 | 12 | class TestEncoder(unittest.TestCase): 13 | 14 | def setUp(self): 15 | self.input_size = 8 16 | self.hidden_size = 32 17 | self.num_layers = 2 18 | self.bidirectional = True 19 | self.rnn_type = 'lstm' 20 | self.N = 4 21 | self.T = 10 22 | self.padded_input = torch.randn(self.N, self.T, self.input_size) 23 | # NOTE: must specify dtype=torch.int 24 | self.input_lengths = torch.tensor([self.T]*self.N, dtype=torch.int) 25 | self.padded_input[-2, -2:, ] = 0 26 | self.input_lengths[-2] = self.T - 2 27 | self.padded_input[-1, -3:, ] = 0 28 | self.input_lengths[-1] = self.T - 3 29 | 30 | def test_forward(self): 31 | encoder = Encoder(self.input_size, self.hidden_size, self.num_layers, 32 | bidirectional=self.bidirectional, 33 | rnn_type=self.rnn_type) 34 | output, hidden = encoder(self.padded_input, self.input_lengths) 35 | self.assertTrue(output.size(), torch.Size( 36 | [self.N, self.T, self.hidden_size])) 37 | 38 | 39 | if __name__ == "__main__": 40 | # uncomment below for unittest 41 | # unittest.main() 42 | 43 | # Non-unittest part 44 | input_size = 8 45 | hidden_size = 5 46 | num_layers = 2 47 | bidirectional = True 48 | rnn_type = 'lstm' 49 | N = 4 50 | T = 10 51 | padded_input = torch.randn(N, T, input_size) 52 | input_lengths = torch.tensor([T]*N, dtype=torch.int) 53 | padded_input[-2, -2:, ] = 0 54 | input_lengths[-2] = T - 2 55 | padded_input[-1, -3:, ] = 0 56 | input_lengths[-1] = T - 3 57 | 58 | print(padded_input) 59 | print(padded_input.size()) 60 | print(input_lengths) 61 | print(input_lengths.size()) 62 | encoder = Encoder(input_size, hidden_size, num_layers, 63 | bidirectional=bidirectional, 64 | rnn_type=rnn_type) 65 | output, hidden = encoder(padded_input, input_lengths) 66 | print(output.size()) 67 | print(output) 68 | print(hidden[0].size()) 69 | print(hidden[0]) 70 | 71 | import sys 72 | sys.exit(0) 73 | # test with data.py 74 | import json 75 | from data.data import AudioDataset 76 | from data.data import AudioDataLoader 77 | 78 | # DATA PART 79 | train_json = "data/data.json" 80 | batch_size = 2 81 | max_length_in = 1000 82 | max_length_out = 1000 83 | num_batches = 10 84 | num_workers = 2 85 | 86 | with open(train_json, 'rb') as f: 87 | train_json = json.load(f)['utts'] 88 | 89 | train_dataset = AudioDataset( 90 | train_json, batch_size, max_length_in, max_length_out, num_batches) 91 | # NOTE: must set batch_size=1 here. 92 | train_loader = AudioDataLoader( 93 | train_dataset, batch_size=1, num_workers=num_workers) 94 | 95 | # MODEL PART 96 | input_size = 83 97 | hidden_size = 2 98 | num_layers = 2 99 | bidirectional = True 100 | rnn_type = 'lstm' 101 | 102 | encoder = Encoder(input_size, hidden_size, num_layers, 103 | bidirectional=bidirectional, 104 | rnn_type=rnn_type) 105 | encoder.cuda() 106 | for i, (data) in enumerate(train_loader): 107 | padded_input, input_lengths, targets = data 108 | padded_input = padded_input.cuda() 109 | input_lengths = input_lengths.cuda() 110 | print(i) 111 | print(padded_input.size()) 112 | print(input_lengths.size()) 113 | output, hidden = encoder(padded_input, input_lengths) 114 | print(output) 115 | print(output.size()) 116 | print("*"*20) 117 | -------------------------------------------------------------------------------- /test/test_seq2seq.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from models.decoder import Decoder 4 | from models.encoder import Encoder 5 | from models.seq2seq import Seq2Seq 6 | from utils.utils import IGNORE_ID 7 | 8 | if __name__ == "__main__": 9 | # Encoder 10 | D, H, Li, B, R = 8, 2, 2, True, 'lstm' 11 | N, Ti, To = 4, 10, 5 12 | padded_input = torch.randn(N, Ti, D) 13 | input_lengths = torch.tensor([Ti]*N, dtype=torch.int) 14 | padded_input[-2, -2:, ] = 0 15 | input_lengths[-2] = Ti - 2 16 | padded_input[-1, -3:, ] = 0 17 | input_lengths[-1] = Ti - 3 18 | 19 | encoder = Encoder(D, H, Li, 20 | bidirectional=B, 21 | rnn_type=R) 22 | 23 | # Decoder 24 | VOC, EMB, SOS, EOS, L = 10, 3, 8, 9, 2 25 | H = H * 2 if B else H 26 | padded_target = torch.randint(10, (N, To), dtype=torch.long) # N x To 27 | padded_target[-1, -3:] = IGNORE_ID 28 | 29 | decoder = Decoder(VOC, EMB, SOS, EOS, H, L) 30 | 31 | # Seq2Seq 32 | seq2seq = Seq2Seq(encoder, decoder) 33 | loss = seq2seq(padded_input, input_lengths, padded_target) 34 | print(loss) 35 | # print(decoder_outputs) 36 | # print("To+1 =", len(decoder_outputs)) 37 | # print("N, V =", decoder_outputs[0].size()) 38 | 39 | import argparse 40 | beam_size = 5 41 | nbest = 5 42 | defaults = dict(beam_size=beam_size, 43 | nbest=nbest, 44 | decode_max_len=0) 45 | args = argparse.Namespace(**defaults) 46 | char_list = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"] 47 | for i in range(3): 48 | print("\n***** Utt", i+1) 49 | Ti = i + 20 50 | input = torch.randn(Ti, D) 51 | length = torch.tensor([Ti], dtype=torch.int) 52 | nbest_hyps = seq2seq.recognize(input, length, char_list, args) 53 | -------------------------------------------------------------------------------- /tools/Makefile: -------------------------------------------------------------------------------- 1 | KALDI = 2 | 3 | .PHONY: all clean 4 | 5 | all: kaldi kaldi-io-for-python 6 | 7 | kaldi-io-for-python: 8 | # git clone https://github.com/vesis84/kaldi-io-for-python.git 9 | tar -zxvf kaldi-io-for-python.tar.gz 10 | cd ../src/utils; ln -s ../../tools/kaldi-io-for-python/kaldi_io.py 11 | 12 | ifneq ($(strip $(KALDI)),) 13 | kaldi: 14 | ln -s $(KALDI) kaldi 15 | else 16 | kaldi: 17 | # git clone https://github.com/kaldi-asr/kaldi.git kaldi_github; cd kaldi_github/tools; $(MAKE) all 18 | # cd kaldi_github/src; ./configure --shared --use-cuda=no; $(MAKE) depend; $(MAKE) all 19 | # ln -nfs kaldi_github kaldi 20 | endif 21 | 22 | clean: 23 | rm -fr kaldi kaldi-io-for-python ../src/utils/kaldi_io.py 24 | -------------------------------------------------------------------------------- /tools/kaldi-io-for-python.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaituoxu/Listen-Attend-Spell/b43ce63eaf68252fae2056bfcbbdef18c4be2340/tools/kaldi-io-for-python.tar.gz --------------------------------------------------------------------------------